diff --git a/.github/workflows/libcxx-build-and-test.yaml b/.github/workflows/libcxx-build-and-test.yaml index 472d18e73da78..cba8afbb54f0f 100644 --- a/.github/workflows/libcxx-build-and-test.yaml +++ b/.github/workflows/libcxx-build-and-test.yaml @@ -33,18 +33,6 @@ concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number }} cancel-in-progress: true - -env: - # LLVM POST-BRANCH bump version - # LLVM POST-BRANCH add compiler test for ToT - 1, e.g. "Clang 17" - # LLVM RELEASE bump remove compiler ToT - 3, e.g. "Clang 15" - LLVM_HEAD_VERSION: "19" # Used compiler, update POST-BRANCH. - LLVM_PREVIOUS_VERSION: "18" - LLVM_OLDEST_VERSION: "17" - GCC_STABLE_VERSION: "13" - LLVM_SYMBOLIZER_PATH: "/usr/bin/llvm-symbolizer-19" - CLANG_CRASH_DIAGNOSTICS_DIR: "crash_diagnostics" - jobs: stage1: if: github.repository_owner == 'llvm' diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h index 2880bfd03be78..320623cfa15af 100644 --- a/bolt/include/bolt/Profile/DataAggregator.h +++ b/bolt/include/bolt/Profile/DataAggregator.h @@ -170,6 +170,9 @@ class DataAggregator : public DataReader { std::string BuildIDBinaryName; /// Memory map info for a single file as recorded in perf.data + /// When a binary has multiple text segments, the Size is computed as the + /// difference of the last address of these segments from the BaseAddress. + /// The base addresses of all text segments must be the same. struct MMapInfo { uint64_t BaseAddress{0}; /// Base address of the mapped binary. uint64_t MMapAddress{0}; /// Address of the executable segment. @@ -493,6 +496,11 @@ class DataAggregator : public DataReader { /// and return a file name matching a given \p FileBuildID. std::optional getFileNameForBuildID(StringRef FileBuildID); + /// Get a constant reference to the parsed binary mmap entries. + const std::unordered_map &getBinaryMMapInfo() { + return BinaryMMapInfo; + } + friend class YAMLProfileWriter; }; } // namespace bolt diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 697cac9fbcaa0..2b02086e3e0c9 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -95,6 +95,12 @@ cl::opt ReadPreAggregated( "pa", cl::desc("skip perf and read data from a pre-aggregated file format"), cl::cat(AggregatorCategory)); +cl::opt + ReadPerfEvents("perf-script-events", + cl::desc("skip perf event collection by supplying a " + "perf-script output in a textual format"), + cl::ReallyHidden, cl::init(""), cl::cat(AggregatorCategory)); + static cl::opt TimeAggregator("time-aggr", cl::desc("time BOLT aggregator"), @@ -167,8 +173,9 @@ void DataAggregator::findPerfExecutable() { void DataAggregator::start() { outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n"; - // Don't launch perf for pre-aggregated files - if (opts::ReadPreAggregated) + // Don't launch perf for pre-aggregated files or when perf input is specified + // by the user. + if (opts::ReadPreAggregated || !opts::ReadPerfEvents.empty()) return; findPerfExecutable(); @@ -464,6 +471,13 @@ void DataAggregator::filterBinaryMMapInfo() { int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process, PerfProcessErrorCallbackTy Callback) { + if (!opts::ReadPerfEvents.empty()) { + outs() << "PERF2BOLT: using pre-processed perf events for '" << Name + << "' (perf-script-events)\n"; + ParsingBuf = opts::ReadPerfEvents; + return 0; + } + std::string Error; outs() << "PERF2BOLT: waiting for perf " << Name << " collection to finish...\n"; @@ -2056,15 +2070,6 @@ std::error_code DataAggregator::parseMMapEvents() { if (FileMMapInfo.first == "(deleted)") continue; - // Consider only the first mapping of the file for any given PID - auto Range = GlobalMMapInfo.equal_range(FileMMapInfo.first); - bool PIDExists = llvm::any_of(make_range(Range), [&](const auto &MI) { - return MI.second.PID == FileMMapInfo.second.PID; - }); - - if (PIDExists) - continue; - GlobalMMapInfo.insert(FileMMapInfo); } @@ -2116,12 +2121,22 @@ std::error_code DataAggregator::parseMMapEvents() { << " using file offset 0x" << Twine::utohexstr(MMapInfo.Offset) << ". Ignoring profile data for this mapping\n"; continue; - } else { - MMapInfo.BaseAddress = *BaseAddress; } + MMapInfo.BaseAddress = *BaseAddress; } - BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo)); + // Try to add MMapInfo to the map and update its size. Large binaries may + // span to multiple text segments, so the mapping is inserted only on the + // first occurrence. + if (!BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo)).second) + assert(MMapInfo.BaseAddress == BinaryMMapInfo[MMapInfo.PID].BaseAddress && + "Base address on multiple segment mappings should match"); + + // Update mapping size. + const uint64_t EndAddress = MMapInfo.MMapAddress + MMapInfo.Size; + const uint64_t Size = EndAddress - BinaryMMapInfo[MMapInfo.PID].BaseAddress; + if (Size > BinaryMMapInfo[MMapInfo.PID].Size) + BinaryMMapInfo[MMapInfo.PID].Size = Size; } if (BinaryMMapInfo.empty()) { diff --git a/bolt/unittests/Core/CMakeLists.txt b/bolt/unittests/Core/CMakeLists.txt index bad7108dad0b7..208cf6ced7358 100644 --- a/bolt/unittests/Core/CMakeLists.txt +++ b/bolt/unittests/Core/CMakeLists.txt @@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS add_bolt_unittest(CoreTests BinaryContext.cpp MCPlusBuilder.cpp + MemoryMaps.cpp DynoStats.cpp DISABLE_LLVM_LINK_LLVM_DYLIB @@ -17,6 +18,8 @@ target_link_libraries(CoreTests PRIVATE LLVMBOLTCore LLVMBOLTRewrite + LLVMBOLTProfile + LLVMTestingSupport ) foreach (tgt ${BOLT_TARGETS_TO_BUILD}) diff --git a/bolt/unittests/Core/MemoryMaps.cpp b/bolt/unittests/Core/MemoryMaps.cpp new file mode 100644 index 0000000000000..9b5769d051cb6 --- /dev/null +++ b/bolt/unittests/Core/MemoryMaps.cpp @@ -0,0 +1,142 @@ +//===- bolt/unittest/Core/MemoryMaps.cpp ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "bolt/Core/BinaryContext.h" +#include "bolt/Profile/DataAggregator.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Testing/Support/Error.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::ELF; +using namespace bolt; + +namespace opts { +extern cl::opt ReadPerfEvents; +} // namespace opts + +namespace { + +/// Perform checks on memory map events normally captured in perf. Tests use +/// the 'opts::ReadPerfEvents' flag to emulate these events, passing a custom +/// 'perf script' output to DataAggregator. +struct MemoryMapsTester : public testing::TestWithParam { + void SetUp() override { + initalizeLLVM(); + prepareElf(); + initializeBOLT(); + } + +protected: + void initalizeLLVM() { + llvm::InitializeAllTargetInfos(); + llvm::InitializeAllTargetMCs(); + llvm::InitializeAllAsmParsers(); + llvm::InitializeAllDisassemblers(); + llvm::InitializeAllTargets(); + llvm::InitializeAllAsmPrinters(); + } + + void prepareElf() { + memcpy(ElfBuf, "\177ELF", 4); + ELF64LE::Ehdr *EHdr = reinterpret_cast(ElfBuf); + EHdr->e_ident[llvm::ELF::EI_CLASS] = llvm::ELF::ELFCLASS64; + EHdr->e_ident[llvm::ELF::EI_DATA] = llvm::ELF::ELFDATA2LSB; + EHdr->e_machine = GetParam() == Triple::aarch64 ? EM_AARCH64 : EM_X86_64; + MemoryBufferRef Source(StringRef(ElfBuf, sizeof(ElfBuf)), "ELF"); + ObjFile = cantFail(ObjectFile::createObjectFile(Source)); + } + + void initializeBOLT() { + Relocation::Arch = ObjFile->makeTriple().getArch(); + BC = cantFail(BinaryContext::createBinaryContext( + ObjFile->makeTriple(), ObjFile->getFileName(), nullptr, true, + DWARFContext::create(*ObjFile.get()), {llvm::outs(), llvm::errs()})); + ASSERT_FALSE(!BC); + } + + char ElfBuf[sizeof(typename ELF64LE::Ehdr)] = {}; + std::unique_ptr ObjFile; + std::unique_ptr BC; +}; +} // namespace + +#ifdef X86_AVAILABLE + +INSTANTIATE_TEST_SUITE_P(X86, MemoryMapsTester, + ::testing::Values(Triple::x86_64)); + +#endif + +#ifdef AARCH64_AVAILABLE + +INSTANTIATE_TEST_SUITE_P(AArch64, MemoryMapsTester, + ::testing::Values(Triple::aarch64)); + +#endif + +/// Check that the correct mmap size is computed when we have multiple text +/// segment mappings. +TEST_P(MemoryMapsTester, ParseMultipleSegments) { + const int Pid = 1234; + StringRef Filename = "BINARY"; + opts::ReadPerfEvents = formatv( + "name 0 [000] 0.000000: PERF_RECORD_MMAP2 {0}/{0}: " + "[0xabc0000000(0x1000000) @ 0x11c0000 103:01 1573523 0]: r-xp {1}\n" + "name 0 [000] 0.000000: PERF_RECORD_MMAP2 {0}/{0}: " + "[0xabc2000000(0x8000000) @ 0x31d0000 103:01 1573523 0]: r-xp {1}\n", + Pid, Filename); + + BC->SegmentMapInfo[0x11da000] = + SegmentInfo{0x11da000, 0x10da000, 0x11ca000, 0x10da000, 0x10000, true}; + BC->SegmentMapInfo[0x31d0000] = + SegmentInfo{0x31d0000, 0x51ac82c, 0x31d0000, 0x3000000, 0x200000, true}; + + DataAggregator DA(""); + BC->setFilename(Filename); + Error Err = DA.preprocessProfile(*BC); + + // Ignore errors from perf2bolt when parsing memory events later on. + ASSERT_THAT_ERROR(std::move(Err), Succeeded()); + + auto &BinaryMMapInfo = DA.getBinaryMMapInfo(); + auto El = BinaryMMapInfo.find(Pid); + // Check that memory mapping is present and has the expected size. + ASSERT_NE(El, BinaryMMapInfo.end()); + ASSERT_EQ(El->second.Size, static_cast(0xb1d0000)); +} + +/// Check that DataAggregator aborts when pre-processing an input binary +/// with multiple text segments that have different base addresses. +TEST_P(MemoryMapsTester, MultipleSegmentsMismatchedBaseAddress) { + const int Pid = 1234; + StringRef Filename = "BINARY"; + opts::ReadPerfEvents = formatv( + "name 0 [000] 0.000000: PERF_RECORD_MMAP2 {0}/{0}: " + "[0xabc0000000(0x1000000) @ 0x11c0000 103:01 1573523 0]: r-xp {1}\n" + "name 0 [000] 0.000000: PERF_RECORD_MMAP2 {0}/{0}: " + "[0xabc2000000(0x8000000) @ 0x31d0000 103:01 1573523 0]: r-xp {1}\n", + Pid, Filename); + + BC->SegmentMapInfo[0x11da000] = + SegmentInfo{0x11da000, 0x10da000, 0x11ca000, 0x10da000, 0x10000, true}; + // Using '0x31d0fff' FileOffset which triggers a different base address + // for this second text segment. + BC->SegmentMapInfo[0x31d0000] = + SegmentInfo{0x31d0000, 0x51ac82c, 0x31d0fff, 0x3000000, 0x200000, true}; + + DataAggregator DA(""); + BC->setFilename(Filename); + ASSERT_DEATH( + { Error Err = DA.preprocessProfile(*BC); }, + "Base address on multiple segment mappings should match"); +} diff --git a/bolt/utils/bughunter.sh b/bolt/utils/bughunter.sh index 49831cddfdbdd..c5dddc41fb41f 100755 --- a/bolt/utils/bughunter.sh +++ b/bolt/utils/bughunter.sh @@ -131,7 +131,7 @@ if [[ $FAIL -eq "0" ]]; then fi else echo "Did it pass? Type the return code [0 = pass, 1 = fail]" - read -n1 PASS + read -n1 FAIL fi if [[ $FAIL -eq "0" ]] ; then echo " Warning: optimized binary passes." @@ -205,7 +205,7 @@ while [[ "$CONTINUE" -ne "0" ]] ; do echo " OPTIMIZED_BINARY failure=$FAIL" else echo "Did it pass? Type the return code [0 = pass, 1 = fail]" - read -n1 PASS + read -n1 FAIL fi else FAIL=1 diff --git a/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp index e329588290cd4..2b2d80ea9346b 100644 --- a/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp @@ -303,7 +303,7 @@ void InfiniteLoopCheck::check(const MatchFinder::MatchResult &Result) { } } - if (ExprMutationAnalyzer::isUnevaluated(LoopStmt, *LoopStmt, *Result.Context)) + if (ExprMutationAnalyzer::isUnevaluated(LoopStmt, *Result.Context)) return; if (isAtLeastOneCondVarChanged(Func, LoopStmt, Cond, Result.Context)) diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidConstOrRefDataMembersCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidConstOrRefDataMembersCheck.cpp index 6a6e620a4387b..f615976c7edb6 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidConstOrRefDataMembersCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidConstOrRefDataMembersCheck.cpp @@ -13,79 +13,88 @@ using namespace clang::ast_matchers; namespace clang::tidy::cppcoreguidelines { -namespace { -AST_MATCHER(FieldDecl, isMemberOfLambda) { - return Node.getParent()->isLambda(); +static bool isCopyConstructible(CXXRecordDecl const &Node) { + if (Node.needsOverloadResolutionForCopyConstructor() && + Node.needsImplicitCopyConstructor()) { + // unresolved + for (CXXBaseSpecifier const &BS : Node.bases()) { + CXXRecordDecl const *BRD = BS.getType()->getAsCXXRecordDecl(); + if (BRD != nullptr && !isCopyConstructible(*BRD)) + return false; + } + } + if (Node.hasSimpleCopyConstructor()) + return true; + for (CXXConstructorDecl const *Ctor : Node.ctors()) + if (Ctor->isCopyConstructor()) + return !Ctor->isDeleted(); + return false; } -struct MemberFunctionInfo { - bool Declared{}; - bool Deleted{}; -}; - -struct MemberFunctionPairInfo { - MemberFunctionInfo Copy{}; - MemberFunctionInfo Move{}; -}; - -MemberFunctionPairInfo getConstructorsInfo(CXXRecordDecl const &Node) { - MemberFunctionPairInfo Constructors{}; - - for (CXXConstructorDecl const *Ctor : Node.ctors()) { - if (Ctor->isCopyConstructor()) { - Constructors.Copy.Declared = true; - if (Ctor->isDeleted()) - Constructors.Copy.Deleted = true; - } - if (Ctor->isMoveConstructor()) { - Constructors.Move.Declared = true; - if (Ctor->isDeleted()) - Constructors.Move.Deleted = true; +static bool isMoveConstructible(CXXRecordDecl const &Node) { + if (Node.needsOverloadResolutionForMoveConstructor() && + Node.needsImplicitMoveConstructor()) { + // unresolved + for (CXXBaseSpecifier const &BS : Node.bases()) { + CXXRecordDecl const *BRD = BS.getType()->getAsCXXRecordDecl(); + if (BRD != nullptr && !isMoveConstructible(*BRD)) + return false; } } - - return Constructors; + if (Node.hasSimpleMoveConstructor()) + return true; + for (CXXConstructorDecl const *Ctor : Node.ctors()) + if (Ctor->isMoveConstructor()) + return !Ctor->isDeleted(); + return false; } -MemberFunctionPairInfo getAssignmentsInfo(CXXRecordDecl const &Node) { - MemberFunctionPairInfo Assignments{}; - - for (CXXMethodDecl const *Method : Node.methods()) { - if (Method->isCopyAssignmentOperator()) { - Assignments.Copy.Declared = true; - if (Method->isDeleted()) - Assignments.Copy.Deleted = true; +static bool isCopyAssignable(CXXRecordDecl const &Node) { + if (Node.needsOverloadResolutionForCopyAssignment() && + Node.needsImplicitCopyAssignment()) { + // unresolved + for (CXXBaseSpecifier const &BS : Node.bases()) { + CXXRecordDecl const *BRD = BS.getType()->getAsCXXRecordDecl(); + if (BRD != nullptr && !isCopyAssignable(*BRD)) + return false; } + } + if (Node.hasSimpleCopyAssignment()) + return true; + for (CXXMethodDecl const *Method : Node.methods()) + if (Method->isCopyAssignmentOperator()) + return !Method->isDeleted(); + return false; +} - if (Method->isMoveAssignmentOperator()) { - Assignments.Move.Declared = true; - if (Method->isDeleted()) - Assignments.Move.Deleted = true; +static bool isMoveAssignable(CXXRecordDecl const &Node) { + if (Node.needsOverloadResolutionForMoveAssignment() && + Node.needsImplicitMoveAssignment()) { + // unresolved + for (CXXBaseSpecifier const &BS : Node.bases()) { + CXXRecordDecl const *BRD = BS.getType()->getAsCXXRecordDecl(); + if (BRD != nullptr && !isMoveAssignable(*BRD)) + return false; } } - - return Assignments; + if (Node.hasSimpleMoveAssignment()) + return true; + for (CXXMethodDecl const *Method : Node.methods()) + if (Method->isMoveAssignmentOperator()) + return !Method->isDeleted(); + return false; } -AST_MATCHER(CXXRecordDecl, isCopyableOrMovable) { - MemberFunctionPairInfo Constructors = getConstructorsInfo(Node); - MemberFunctionPairInfo Assignments = getAssignmentsInfo(Node); +namespace { - if (Node.hasSimpleCopyConstructor() || - (Constructors.Copy.Declared && !Constructors.Copy.Deleted)) - return true; - if (Node.hasSimpleMoveConstructor() || - (Constructors.Move.Declared && !Constructors.Move.Deleted)) - return true; - if (Node.hasSimpleCopyAssignment() || - (Assignments.Copy.Declared && !Assignments.Copy.Deleted)) - return true; - if (Node.hasSimpleMoveAssignment() || - (Assignments.Move.Declared && !Assignments.Move.Deleted)) - return true; +AST_MATCHER(FieldDecl, isMemberOfLambda) { + return Node.getParent()->isLambda(); +} - return false; +AST_MATCHER(CXXRecordDecl, isCopyableOrMovable) { + return isCopyConstructible(Node) || isMoveConstructible(Node) || + isCopyAssignable(Node) || isMoveAssignable(Node); } } // namespace diff --git a/clang-tools-extra/clang-tidy/misc/UseInternalLinkageCheck.cpp b/clang-tools-extra/clang-tidy/misc/UseInternalLinkageCheck.cpp index d900978f65a94..71eb2d94cd4f2 100644 --- a/clang-tools-extra/clang-tidy/misc/UseInternalLinkageCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/UseInternalLinkageCheck.cpp @@ -8,14 +8,12 @@ #include "UseInternalLinkageCheck.h" #include "../utils/FileExtensionsUtils.h" -#include "../utils/LexerUtils.h" #include "clang/AST/Decl.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/ASTMatchers/ASTMatchersMacros.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/Specifiers.h" -#include "clang/Basic/TokenKinds.h" #include "clang/Lex/Token.h" #include "llvm/ADT/STLExtras.h" @@ -47,6 +45,8 @@ namespace { AST_MATCHER(Decl, isFirstDecl) { return Node.isFirstDecl(); } +AST_MATCHER(FunctionDecl, hasBody) { return Node.hasBody(); } + static bool isInMainFile(SourceLocation L, SourceManager &SM, const FileExtensionsSet &HeaderFileExtensions) { for (;;) { @@ -103,7 +103,7 @@ void UseInternalLinkageCheck::registerMatchers(MatchFinder *Finder) { // 4. friend hasAncestor(friendDecl())))); Finder->addMatcher( - functionDecl(Common, unless(cxxMethodDecl()), unless(isMain())) + functionDecl(Common, hasBody(), unless(cxxMethodDecl()), unless(isMain())) .bind("fn"), this); Finder->addMatcher(varDecl(Common, hasGlobalStorage()).bind("var"), this); diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index dcfe68e020fc9..fec2c20206bc4 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -207,6 +207,10 @@ Changes in existing checks fix false positive that floating point variable is only used in increment expression. +- Improved :doc:`cppcoreguidelines-avoid-const-or-ref-data-members + ` check to + avoid false positives when detecting a templated class with inheritance. + - Improved :doc:`cppcoreguidelines-init-variables ` check by fixing the insertion location for function pointers. @@ -228,6 +232,11 @@ Changes in existing checks ` check to avoid false positive for C++23 deducing this. +- Improved :doc:`misc-use-internal-linkage + ` check to insert ``static`` + keyword before type qualifiers such as ``const`` and ``volatile`` and fix + false positives for function declaration without body. + - Improved :doc:`modernize-avoid-c-arrays ` check to suggest using ``std::span`` as a replacement for parameters of incomplete C array type in @@ -237,10 +246,6 @@ Changes in existing checks ` check to fix false positive when using loop variable in initializer of lambda capture. -- Improved :doc:`misc-use-internal-linkage - ` check to insert ``static`` keyword - before type qualifiers such as ``const`` and ``volatile``. - - Improved :doc:`modernize-min-max-use-initializer-list ` check by fixing a false positive when only an implicit conversion happened inside an diff --git a/clang-tools-extra/docs/clang-tidy/checks/misc/use-internal-linkage.rst b/clang-tools-extra/docs/clang-tidy/checks/misc/use-internal-linkage.rst index 7147af9a7919b..b8bbcc6270610 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/misc/use-internal-linkage.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/misc/use-internal-linkage.rst @@ -16,7 +16,7 @@ Example: int v1; // can be marked as static - void fn1(); // can be marked as static + void fn1() {} // can be marked as static namespace { // already in anonymous namespace @@ -26,6 +26,9 @@ Example: // already declared as extern extern int v2; + void fn3(); // without function body in all declaration, maybe external linkage + void fn3(); + Options ------- diff --git a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/avoid-const-or-ref-data-members.cpp b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/avoid-const-or-ref-data-members.cpp index e3864be134da3..19da88300aec4 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/avoid-const-or-ref-data-members.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/avoid-const-or-ref-data-members.cpp @@ -285,6 +285,28 @@ struct InheritBothFromNonCopyableAndNonMovable : NonCopyable, NonMovable int& x; // OK, non copyable nor movable }; +template struct TemplateInheritFromNonCopyable : NonCopyable +{ + int& x; + // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: member 'x' of type 'int &' is a reference +}; + +template struct TemplateInheritFromNonMovable : NonMovable +{ + int& x; + // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: member 'x' of type 'int &' is a reference +}; + +template struct TemplateInheritFromNonCopyableNonMovable : NonCopyableNonMovable +{ + int& x; // OK, non copyable nor movable +}; + +template struct TemplateInheritBothFromNonCopyableAndNonMovable : NonCopyable, NonMovable +{ + int& x; // OK, non copyable nor movable +}; + // Test composition struct ContainsNonCopyable { diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/use-internal-linkage-func.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/use-internal-linkage-func.cpp index 8dc739da3a273..bf0d2c2513e56 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/misc/use-internal-linkage-func.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/misc/use-internal-linkage-func.cpp @@ -13,59 +13,59 @@ void func_template() {} // CHECK-MESSAGES: :[[@LINE-1]]:6: warning: function 'func_template' // CHECK-FIXES: static void func_template() {} -void func_cpp_inc(); +void func_cpp_inc() {} // CHECK-MESSAGES: :[[@LINE-1]]:6: warning: function 'func_cpp_inc' -// CHECK-FIXES: static void func_cpp_inc(); +// CHECK-FIXES: static void func_cpp_inc() {} -int* func_cpp_inc_return_ptr(); +int* func_cpp_inc_return_ptr() {} // CHECK-MESSAGES: :[[@LINE-1]]:6: warning: function 'func_cpp_inc_return_ptr' -// CHECK-FIXES: static int* func_cpp_inc_return_ptr(); +// CHECK-FIXES: static int* func_cpp_inc_return_ptr() {} -const int* func_cpp_inc_return_const_ptr(); +const int* func_cpp_inc_return_const_ptr() {} // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: function 'func_cpp_inc_return_const_ptr' -// CHECK-FIXES: static const int* func_cpp_inc_return_const_ptr(); +// CHECK-FIXES: static const int* func_cpp_inc_return_const_ptr() {} -int const* func_cpp_inc_return_ptr_const(); +int const* func_cpp_inc_return_ptr_const() {} // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: function 'func_cpp_inc_return_ptr_const' -// CHECK-FIXES: static int const* func_cpp_inc_return_ptr_const(); +// CHECK-FIXES: static int const* func_cpp_inc_return_ptr_const() {} -int * const func_cpp_inc_return_const(); +int * const func_cpp_inc_return_const() {} // CHECK-MESSAGES: :[[@LINE-1]]:13: warning: function 'func_cpp_inc_return_const' -// CHECK-FIXES: static int * const func_cpp_inc_return_const(); +// CHECK-FIXES: static int * const func_cpp_inc_return_const() {} -volatile const int* func_cpp_inc_return_volatile_const_ptr(); +volatile const int* func_cpp_inc_return_volatile_const_ptr() {} // CHECK-MESSAGES: :[[@LINE-1]]:21: warning: function 'func_cpp_inc_return_volatile_const_ptr' -// CHECK-FIXES: static volatile const int* func_cpp_inc_return_volatile_const_ptr(); +// CHECK-FIXES: static volatile const int* func_cpp_inc_return_volatile_const_ptr() {} -[[nodiscard]] void func_nodiscard(); +[[nodiscard]] void func_nodiscard() {} // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: function 'func_nodiscard' -// CHECK-FIXES: {{\[\[nodiscard\]\]}} static void func_nodiscard(); +// CHECK-FIXES: {{\[\[nodiscard\]\]}} static void func_nodiscard() {} #define NDS [[nodiscard]] #define NNDS -NDS void func_nds(); +NDS void func_nds() {} // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: function 'func_nds' -// CHECK-FIXES: NDS static void func_nds(); +// CHECK-FIXES: NDS static void func_nds() {} -NNDS void func_nnds(); +NNDS void func_nnds() {} // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: function 'func_nnds' -// CHECK-FIXES: NNDS static void func_nnds(); +// CHECK-FIXES: NNDS static void func_nnds() {} #include "func_cpp.inc" -void func_h_inc(); +void func_h_inc() {} struct S { void method(); }; void S::method() {} -void func_header(); -extern void func_extern(); -static void func_static(); +void func_header() {} +extern void func_extern() {} +static void func_static() {} namespace { -void func_anonymous_ns(); +void func_anonymous_ns() {} } // namespace int main(int argc, const char*argv[]) {} @@ -75,3 +75,13 @@ void func_extern_c_1() {} } extern "C" void func_extern_c_2() {} + +namespace gh117488 { +void func_with_body(); +// CHECK-MESSAGES: :[[@LINE-1]]:6: warning: function 'func_with_body' +// CHECK-FIXES: static void func_with_body(); +void func_with_body() {} + +void func_without_body(); +void func_without_body(); +} diff --git a/clang/docs/ClangFormat.rst b/clang/docs/ClangFormat.rst index 7afad5b15b2d5..e17d741b0a00e 100644 --- a/clang/docs/ClangFormat.rst +++ b/clang/docs/ClangFormat.rst @@ -33,7 +33,7 @@ to format C/C++/Java/JavaScript/JSON/Objective-C/Protobuf/C# code. Clang-format options: --Werror - If set, changes formatting warnings to errors - --Wno-error= - If set don't error out on the specified warning type. + --Wno-error= - If set, don't error out on the specified warning type. =unknown - If set, unknown format options are only warned about. This can be used to enable formatting, even if the configuration contains unknown (newer) options. diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index 3f996ceaff156..481362dba3f51 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -290,7 +290,7 @@ implementation. +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ | memory management | changes to omp_alloctrait_key enum | :none:`unclaimed` | | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| memory model | seq_cst clause on flush construct | :none:`unclaimed` | | +| memory model | seq_cst clause on flush construct | :good:`done` | https://github.com/llvm/llvm-project/pull/114072 | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ | misc | 'omp_all_memory' keyword and use in 'depend' clause | :good:`done` | D125828, D126321 | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 8bd06fadfdc98..954fe61f3d1d6 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -332,6 +332,8 @@ C23 Feature Support - Clang now supports `N3029 `_ Improved Normal Enumerations. - Clang now officially supports `N3030 `_ Enhancements to Enumerations. Clang already supported it as an extension, so there were no changes to compiler behavior. +- Fixed the value of ``BOOL_WIDTH`` in ```` to return ``1`` + explicitly, as mandated by the standard. Fixes #GH117348 Non-comprehensive list of changes in this release ------------------------------------------------- @@ -471,6 +473,8 @@ Attribute Changes in Clang - Clang now supports ``[[clang::lifetime_capture_by(X)]]``. Similar to lifetimebound, this can be used to specify when a reference to a function parameter is captured by another capturing entity ``X``. +- The ``target_version`` attribute is now only supported for AArch64 and RISC-V architectures. + Improvements to Clang's diagnostics ----------------------------------- @@ -714,6 +718,7 @@ Bug Fixes to C++ Support assumption if they also occur inside of a dependent lambda. (#GH114787) - Clang now uses valid deduced type locations when diagnosing functions with trailing return type missing placeholder return type. (#GH78694) +- Fixed a bug where bounds of partially expanded pack indexing expressions were checked too early. (#GH116105) Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/docs/SanitizerCoverage.rst b/clang/docs/SanitizerCoverage.rst index 45ad03cb43774..6ea1d14829005 100644 --- a/clang/docs/SanitizerCoverage.rst +++ b/clang/docs/SanitizerCoverage.rst @@ -385,6 +385,20 @@ Users need to implement a single function to capture the CF table at startup: // the collected control flow. } +Gated Trace Callbacks +===================== + +Gate the invocation of the tracing callbacks with +``-sanitizer-coverage-gated-trace-callbacks``. + +When this option is enabled, the instrumentation will not call into the +runtime-provided callbacks for tracing, thus only incurring in a trivial +branch without going through a function call. + +It is up to the runtime to toggle the value of the global variable in order to +enable tracing. + +This option is only supported for trace-pc-guard and trace-cmp. Disabling instrumentation with ``__attribute__((no_sanitize("coverage")))`` =========================================================================== diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h index 696a574833dad..1a24b8857674c 100644 --- a/clang/include/clang/AST/ExprCXX.h +++ b/clang/include/clang/AST/ExprCXX.h @@ -4390,17 +4390,17 @@ class PackIndexingExpr final unsigned TransformedExpressions : 31; LLVM_PREFERRED_TYPE(bool) - unsigned ExpandedToEmptyPack : 1; + unsigned FullySubstituted : 1; PackIndexingExpr(QualType Type, SourceLocation EllipsisLoc, SourceLocation RSquareLoc, Expr *PackIdExpr, Expr *IndexExpr, ArrayRef SubstitutedExprs = {}, - bool ExpandedToEmptyPack = false) + bool FullySubstituted = false) : Expr(PackIndexingExprClass, Type, VK_LValue, OK_Ordinary), EllipsisLoc(EllipsisLoc), RSquareLoc(RSquareLoc), SubExprs{PackIdExpr, IndexExpr}, TransformedExpressions(SubstitutedExprs.size()), - ExpandedToEmptyPack(ExpandedToEmptyPack) { + FullySubstituted(FullySubstituted) { auto *Exprs = getTrailingObjects(); std::uninitialized_copy(SubstitutedExprs.begin(), SubstitutedExprs.end(), @@ -4424,12 +4424,16 @@ class PackIndexingExpr final SourceLocation RSquareLoc, Expr *PackIdExpr, Expr *IndexExpr, std::optional Index, ArrayRef SubstitutedExprs = {}, - bool ExpandedToEmptyPack = false); + bool FullySubstituted = false); static PackIndexingExpr *CreateDeserialized(ASTContext &Context, unsigned NumTransformedExprs); + bool isFullySubstituted() const { return FullySubstituted; } + /// Determine if the expression was expanded to empty. - bool expandsToEmptyPack() const { return ExpandedToEmptyPack; } + bool expandsToEmptyPack() const { + return isFullySubstituted() && TransformedExpressions == 0; + } /// Determine the location of the 'sizeof' keyword. SourceLocation getEllipsisLoc() const { return EllipsisLoc; } diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 00c87e71bde31..d2f5267e4da5e 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -2670,8 +2670,8 @@ class OMPCompareClause final : public OMPClause { } }; -/// This represents 'seq_cst' clause in the '#pragma omp atomic' -/// directive. +/// This represents 'seq_cst' clause in the '#pragma omp atomic|flush' +/// directives. /// /// \code /// #pragma omp atomic seq_cst diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index 1ed5c22361ca6..90a52b1dcbf62 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -5922,12 +5922,12 @@ class PackIndexingType final unsigned Size : 31; LLVM_PREFERRED_TYPE(bool) - unsigned ExpandsToEmptyPack : 1; + unsigned FullySubstituted : 1; protected: friend class ASTContext; // ASTContext creates these. PackIndexingType(const ASTContext &Context, QualType Canonical, - QualType Pattern, Expr *IndexExpr, bool ExpandsToEmptyPack, + QualType Pattern, Expr *IndexExpr, bool FullySubstituted, ArrayRef Expansions = {}); public: @@ -5951,7 +5951,9 @@ class PackIndexingType final bool hasSelectedType() const { return getSelectedIndex() != std::nullopt; } - bool expandsToEmptyPack() const { return ExpandsToEmptyPack; } + bool isFullySubstituted() const { return FullySubstituted; } + + bool expandsToEmptyPack() const { return isFullySubstituted() && Size == 0; } ArrayRef getExpansions() const { return {getExpansionsPtr(), Size}; @@ -5965,10 +5967,10 @@ class PackIndexingType final if (hasSelectedType()) getSelectedType().Profile(ID); else - Profile(ID, Context, getPattern(), getIndexExpr(), expandsToEmptyPack()); + Profile(ID, Context, getPattern(), getIndexExpr(), isFullySubstituted()); } static void Profile(llvm::FoldingSetNodeID &ID, const ASTContext &Context, - QualType Pattern, Expr *E, bool ExpandsToEmptyPack); + QualType Pattern, Expr *E, bool FullySubstituted); private: const QualType *getExpansionsPtr() const { diff --git a/clang/include/clang/AST/TypeProperties.td b/clang/include/clang/AST/TypeProperties.td index a8b9c920b617c..6f1a76bd18fb5 100644 --- a/clang/include/clang/AST/TypeProperties.td +++ b/clang/include/clang/AST/TypeProperties.td @@ -473,12 +473,12 @@ let Class = PackIndexingType in { def : Property<"indexExpression", ExprRef> { let Read = [{ node->getIndexExpr() }]; } - def : Property<"expandsToEmptyPack", Bool> { - let Read = [{ node->expandsToEmptyPack() }]; + def : Property<"isFullySubstituted", Bool> { + let Read = [{ node->isFullySubstituted() }]; } def : Creator<[{ - return ctx.getPackIndexingType(pattern, indexExpression, expandsToEmptyPack); + return ctx.getPackIndexingType(pattern, indexExpression, isFullySubstituted); }]>; } diff --git a/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h b/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h index c7a5b016c949d..7442f4aad531b 100644 --- a/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h +++ b/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h @@ -47,8 +47,6 @@ class ExprMutationAnalyzer { const Stmt *findPointeeMutation(const Expr *Exp); const Stmt *findPointeeMutation(const Decl *Dec); - static bool isUnevaluated(const Stmt *Smt, const Stmt &Stm, - ASTContext &Context); private: using MutationFinder = const Stmt *(Analyzer::*)(const Expr *); @@ -58,8 +56,6 @@ class ExprMutationAnalyzer { Memoized::ResultMap &MemoizedResults); const Stmt *tryEachDeclRef(const Decl *Dec, MutationFinder Finder); - bool isUnevaluated(const Expr *Exp); - const Stmt *findExprMutation(ArrayRef Matches); const Stmt *findDeclMutation(ArrayRef Matches); const Stmt * @@ -83,6 +79,10 @@ class ExprMutationAnalyzer { ExprMutationAnalyzer(const Stmt &Stm, ASTContext &Context) : Memorized(), A(Stm, Context, Memorized) {} + /// check whether stmt is unevaluated. mutation analyzer will ignore the + /// content in unevaluated stmt. + static bool isUnevaluated(const Stmt *Stm, ASTContext &Context); + bool isMutated(const Expr *Exp) { return findMutation(Exp) != nullptr; } bool isMutated(const Decl *Dec) { return findMutation(Dec) != nullptr; } const Stmt *findMutation(const Expr *Exp) { return A.findMutation(Exp); } @@ -101,11 +101,6 @@ class ExprMutationAnalyzer { return A.findPointeeMutation(Dec); } - static bool isUnevaluated(const Stmt *Smt, const Stmt &Stm, - ASTContext &Context) { - return Analyzer::isUnevaluated(Smt, Stm, Context); - } - private: Memoized Memorized; Analyzer A; diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 634253d003256..14009826f2c55 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -3297,7 +3297,7 @@ def Target : InheritableAttr { }]; } -def TargetVersion : InheritableAttr { +def TargetVersion : InheritableAttr, TargetSpecificAttr> { let Spellings = [GCC<"target_version">]; let Args = [StringArgument<"NamesStr">]; let Subjects = SubjectList<[Function], ErrorDiag>; diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 83c90b3d6e681..eaff744924805 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4882,7 +4882,6 @@ def HLSLSaturate : LangBuiltin<"HLSL_LANG"> { let Prototype = "void(...)"; } - def HLSLSelect : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_select"]; let Attributes = [NoThrow, Const]; @@ -4907,6 +4906,12 @@ def HLSLRadians : LangBuiltin<"HLSL_LANG"> { let Prototype = "void(...)"; } +def HLSLBufferUpdateCounter : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_buffer_update_counter"]; + let Attributes = [NoThrow]; + let Prototype = "uint32_t(...)"; +} + def HLSLSplitDouble: LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_elementwise_splitdouble"]; let Attributes = [NoThrow, Const]; diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 51a5b1dbad495..49304d12d6d70 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -263,7 +263,7 @@ TARGET_BUILTIN(__builtin_amdgcn_global_load_lds, "vv*1v*3IUiIiIUi", "t", "gfx940 TARGET_BUILTIN(__builtin_amdgcn_fdot2, "fV2hV2hfIb", "nc", "dot10-insts") TARGET_BUILTIN(__builtin_amdgcn_fdot2_f16_f16, "hV2hV2hh", "nc", "dot9-insts") TARGET_BUILTIN(__builtin_amdgcn_fdot2_bf16_bf16, "sV2sV2ss", "nc", "dot9-insts") -TARGET_BUILTIN(__builtin_amdgcn_fdot2_f32_bf16, "fV2sV2sfIb", "nc", "dot9-insts") +TARGET_BUILTIN(__builtin_amdgcn_fdot2_f32_bf16, "fV2sV2sfIb", "nc", "dot12-insts") TARGET_BUILTIN(__builtin_amdgcn_sdot2, "SiV2SsV2SsSiIb", "nc", "dot2-insts") TARGET_BUILTIN(__builtin_amdgcn_udot2, "UiV2UsV2UsUiIb", "nc", "dot2-insts") TARGET_BUILTIN(__builtin_amdgcn_sdot4, "SiSiSiSiIb", "nc", "dot1-insts") @@ -276,6 +276,7 @@ TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_fp8_bf8, "fUiUif", "nc", "dot11-insts") TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_bf8_fp8, "fUiUif", "nc", "dot11-insts") TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_fp8_fp8, "fUiUif", "nc", "dot11-insts") TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_bf8_bf8, "fUiUif", "nc", "dot11-insts") +TARGET_BUILTIN(__builtin_amdgcn_fdot2c_f32_bf16, "fV2yV2yfIb", "nc", "dot13-insts") //===----------------------------------------------------------------------===// // GFX10+ only builtins. @@ -459,6 +460,20 @@ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8, "V16fV4iV8iV16fiIiI TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8, "V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts") TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8, "V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts") +TARGET_BUILTIN(__builtin_amdgcn_permlane16_swap, "V2UiUiUiIbIb", "nc", "permlane16-swap") +TARGET_BUILTIN(__builtin_amdgcn_permlane32_swap, "V2UiUiUiIbIb", "nc", "permlane32-swap") + +TARGET_BUILTIN(__builtin_amdgcn_ds_read_tr4_b64_v2i32, "V2iV2i*3", "nc", "gfx950-insts") +TARGET_BUILTIN(__builtin_amdgcn_ds_read_tr6_b96_v3i32, "V3iV3i*3", "nc", "gfx950-insts") +TARGET_BUILTIN(__builtin_amdgcn_ds_read_tr8_b64_v2i32, "V2iV2i*3", "nc", "gfx950-insts") +TARGET_BUILTIN(__builtin_amdgcn_ds_read_tr16_b64_v4i16, "V4sV4s*3", "nc", "gfx950-insts") + +TARGET_BUILTIN(__builtin_amdgcn_ashr_pk_i8_i32, "UsUiUiUi", "nc", "ashr-pk-insts") +TARGET_BUILTIN(__builtin_amdgcn_ashr_pk_u8_i32, "UsUiUiUi", "nc", "ashr-pk-insts") + +TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_2xpk16_fp6_f32, "V6UiV16fV16ff", "nc", "gfx950-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_2xpk16_bf6_f32, "V6UiV16fV16ff", "nc", "gfx950-insts") + //===----------------------------------------------------------------------===// // GFX12+ only builtins. //===----------------------------------------------------------------------===// @@ -551,6 +566,10 @@ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64, "V4fiV2iV4fs", TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64, "V4fiV2iV4fs", "nc", "gfx12-insts,wavefrontsize64") TARGET_BUILTIN(__builtin_amdgcn_prng_b32, "UiUi", "nc", "prng-inst") +TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk32_fp6_f16, "V6UiV32hf", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk32_bf6_f16, "V6UiV32hf", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk32_fp6_bf16, "V6UiV32yf", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk32_bf6_bf16, "V6UiV32yf", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts") #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index eb05a6a77978a..834e588c18e37 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7287,6 +7287,8 @@ def err_typecheck_illegal_increment_decrement : Error< "cannot %select{decrement|increment}1 value of type %0">; def err_typecheck_expect_int : Error< "used type %0 where integer is required">; +def err_typecheck_expect_hlsl_resource : Error< + "used type %0 where __hlsl_resource_t is required">; def err_typecheck_arithmetic_incomplete_or_sizeless_type : Error< "arithmetic on a pointer to %select{an incomplete|sizeless}0 type %1">; def err_typecheck_pointer_arith_function_type : Error< @@ -11396,7 +11398,7 @@ def err_omp_atomic_weak_no_equality : Error<"expected '==' operator for 'weak' c def err_omp_atomic_several_clauses : Error< "directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause">; def err_omp_several_mem_order_clauses : Error< - "directive '#pragma omp %0' cannot contain more than one %select{'seq_cst', 'relaxed', |}1'acq_rel', 'acquire' or 'release' clause">; + "directive '#pragma omp %0' cannot contain more than one 'seq_cst',%select{ 'relaxed',|}1 'acq_rel', 'acquire' or 'release' clause">; def err_omp_atomic_incompatible_mem_order_clause : Error< "directive '#pragma omp atomic%select{ %0|}1' cannot be used with '%2' clause">; def note_omp_previous_mem_order_clause : Note< @@ -12528,6 +12530,10 @@ def warn_attr_min_eq_max: Warning< def err_hlsl_attribute_number_arguments_insufficient_shader_model: Error< "attribute %0 with %1 arguments requires shader model %2 or greater">; +def err_hlsl_expect_arg_const_int_one_or_neg_one: Error< + "argument %0 must be constant integer 1 or -1">; +def err_invalid_hlsl_resource_type: Error< + "invalid __hlsl_resource_t type attributes">; // Layout randomization diagnostics. def err_non_designated_init_used : Error< diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index ea6b414618c1d..056fad2cc0ff8 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -4970,12 +4970,11 @@ struct FormatStyle { /// \version 12 std::vector StatementAttributeLikeMacros; - /// A vector of macros that should be interpreted as complete - /// statements. + /// A vector of macros that should be interpreted as complete statements. /// - /// Typical macros are expressions, and require a semi-colon to be - /// added; sometimes this is not the case, and this allows to make - /// clang-format aware of such cases. + /// Typical macros are expressions and require a semicolon to be added. + /// Sometimes this is not the case, and this allows to make clang-format aware + /// of such cases. /// /// For example: Q_UNUSED /// \version 8 diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 5fe23e0d0efd3..24abd5d95dd84 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -14257,7 +14257,7 @@ class Sema final : public SemaBase { SourceLocation EllipsisLoc, Expr *IndexExpr, SourceLocation RSquareLoc, ArrayRef ExpandedExprs = {}, - bool EmptyPack = false); + bool FullySubstituted = false); /// Handle a C++1z fold-expression: ( expr op ... op expr ). ExprResult ActOnCXXFoldExpr(Scope *S, SourceLocation LParenLoc, Expr *LHS, diff --git a/clang/lib/AST/APValue.cpp b/clang/lib/AST/APValue.cpp index 4f5d14cbd59bb..f9e08b70d6ab0 100644 --- a/clang/lib/AST/APValue.cpp +++ b/clang/lib/AST/APValue.cpp @@ -1087,10 +1087,6 @@ void APValue::MakeArray(unsigned InitElts, unsigned Size) { Kind = Array; } -MutableArrayRef -setLValueUninit(APValue::LValueBase B, const CharUnits &O, unsigned Size, - bool OnePastTheEnd, bool IsNullPtr); - MutableArrayRef APValue::setMemberPointerUninit(const ValueDecl *Member, bool IsDerivedMember, unsigned Size) { diff --git a/clang/lib/AST/ASTConcept.cpp b/clang/lib/AST/ASTConcept.cpp index bdc713ca3e791..f7ee0fb3ee92d 100644 --- a/clang/lib/AST/ASTConcept.cpp +++ b/clang/lib/AST/ASTConcept.cpp @@ -22,11 +22,11 @@ static void CreateUnsatisfiedConstraintRecord(const ASTContext &C, const UnsatisfiedConstraintRecord &Detail, UnsatisfiedConstraintRecord *TrailingObject) { - if (Detail.is()) - new (TrailingObject) UnsatisfiedConstraintRecord(Detail.get()); + if (auto *E = dyn_cast(Detail)) + new (TrailingObject) UnsatisfiedConstraintRecord(E); else { auto &SubstitutionDiagnostic = - *Detail.get *>(); + *cast *>(Detail); StringRef Message = C.backupStr(SubstitutionDiagnostic.second); auto *NewSubstDiag = new (C) std::pair( SubstitutionDiagnostic.first, Message); diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 23df7878a3bf2..80e8c5b9df58e 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -374,10 +374,10 @@ static const Decl &adjustDeclToTemplate(const Decl &D) { llvm::PointerUnion PU = CTSD->getSpecializedTemplateOrPartial(); - return PU.is() - ? *static_cast(PU.get()) + return isa(PU) + ? *static_cast(cast(PU)) : *static_cast( - PU.get()); + cast(PU)); } // Class is instantiated from a member definition of a class template? @@ -6223,13 +6223,11 @@ QualType ASTContext::getPackIndexingType(QualType Pattern, Expr *IndexExpr, ArrayRef Expansions, int Index) const { QualType Canonical; - bool ExpandsToEmptyPack = FullySubstituted && Expansions.empty(); if (FullySubstituted && Index != -1) { Canonical = getCanonicalType(Expansions[Index]); } else { llvm::FoldingSetNodeID ID; - PackIndexingType::Profile(ID, *this, Pattern, IndexExpr, - ExpandsToEmptyPack); + PackIndexingType::Profile(ID, *this, Pattern, IndexExpr, FullySubstituted); void *InsertPos = nullptr; PackIndexingType *Canon = DependentPackIndexingTypes.FindNodeOrInsertPos(ID, InsertPos); @@ -6238,7 +6236,7 @@ QualType ASTContext::getPackIndexingType(QualType Pattern, Expr *IndexExpr, PackIndexingType::totalSizeToAlloc(Expansions.size()), TypeAlignment); Canon = new (Mem) PackIndexingType(*this, QualType(), Pattern, IndexExpr, - ExpandsToEmptyPack, Expansions); + FullySubstituted, Expansions); DependentPackIndexingTypes.InsertNode(Canon, InsertPos); } Canonical = QualType(Canon, 0); @@ -6248,7 +6246,7 @@ QualType ASTContext::getPackIndexingType(QualType Pattern, Expr *IndexExpr, Allocate(PackIndexingType::totalSizeToAlloc(Expansions.size()), TypeAlignment); auto *T = new (Mem) PackIndexingType(*this, Canonical, Pattern, IndexExpr, - ExpandsToEmptyPack, Expansions); + FullySubstituted, Expansions); Types.push_back(T); return QualType(T, 0); } diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index baed141663543..a0cd57e2e5ee0 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -104,8 +104,8 @@ namespace clang { char ASTImportError::ID; template - SmallVector - getCanonicalForwardRedeclChain(Redeclarable* D) { + static SmallVector + getCanonicalForwardRedeclChain(Redeclarable *D) { SmallVector Redecls; for (auto *R : D->getFirstDecl()->redecls()) { if (R != D->getFirstDecl()) @@ -126,7 +126,7 @@ namespace clang { llvm_unreachable("Bad declaration kind"); } - void updateFlags(const Decl *From, Decl *To) { + static void updateFlags(const Decl *From, Decl *To) { // Check if some flags or attrs are new in 'From' and copy into 'To'. // FIXME: Other flags or attrs? if (From->isUsed(false) && !To->isUsed(false)) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 7cf2519d6a71f..f4cc284dfb6ab 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -448,6 +448,10 @@ bool Compiler::VisitCastExpr(const CastExpr *CE) { QualType SubExprTy = SubExpr->getType(); std::optional FromT = classify(SubExprTy); + // Casts from integer to vectors in C. + if (FromT && CE->getType()->isVectorType()) + return this->emitBuiltinBitCast(CE); + std::optional ToT = classify(CE->getType()); if (!FromT || !ToT) return false; @@ -1642,22 +1646,8 @@ bool Compiler::VisitImplicitValueInitExpr( if (QT->isIncompleteArrayType()) return true; - if (QT->isArrayType()) { - const ArrayType *AT = QT->getAsArrayTypeUnsafe(); - assert(AT); - const auto *CAT = cast(AT); - size_t NumElems = CAT->getZExtSize(); - PrimType ElemT = classifyPrim(CAT->getElementType()); - - for (size_t I = 0; I != NumElems; ++I) { - if (!this->visitZeroInitializer(ElemT, CAT->getElementType(), E)) - return false; - if (!this->emitInitElem(ElemT, I, E)) - return false; - } - - return true; - } + if (QT->isArrayType()) + return this->visitZeroArrayInitializer(QT, E); if (const auto *ComplexTy = E->getType()->getAs()) { assert(Initializing); @@ -3916,18 +3906,9 @@ bool Compiler::visitZeroRecordInitializer(const Record *R, return false; } } else if (D->isCompositeArray()) { - const Record *ElemRecord = D->ElemDesc->ElemRecord; - assert(D->ElemDesc->ElemRecord); - for (uint32_t I = 0, N = D->getNumElems(); I != N; ++I) { - if (!this->emitConstUint32(I, E)) - return false; - if (!this->emitArrayElemPtr(PT_Uint32, E)) - return false; - if (!this->visitZeroRecordInitializer(ElemRecord, E)) - return false; - if (!this->emitPopPtr(E)) - return false; - } + // Can't be a vector or complex field. + if (!this->visitZeroArrayInitializer(D->getType(), E)) + return false; } else if (D->isRecord()) { if (!this->visitZeroRecordInitializer(D->ElemRecord, E)) return false; @@ -3958,6 +3939,52 @@ bool Compiler::visitZeroRecordInitializer(const Record *R, return true; } +template +bool Compiler::visitZeroArrayInitializer(QualType T, const Expr *E) { + assert(T->isArrayType() || T->isAnyComplexType() || T->isVectorType()); + const ArrayType *AT = T->getAsArrayTypeUnsafe(); + QualType ElemType = AT->getElementType(); + size_t NumElems = cast(AT)->getZExtSize(); + + if (std::optional ElemT = classify(ElemType)) { + for (size_t I = 0; I != NumElems; ++I) { + if (!this->visitZeroInitializer(*ElemT, ElemType, E)) + return false; + if (!this->emitInitElem(*ElemT, I, E)) + return false; + } + return true; + } else if (ElemType->isRecordType()) { + const Record *R = getRecord(ElemType); + + for (size_t I = 0; I != NumElems; ++I) { + if (!this->emitConstUint32(I, E)) + return false; + if (!this->emitArrayElemPtr(PT_Uint32, E)) + return false; + if (!this->visitZeroRecordInitializer(R, E)) + return false; + if (!this->emitPopPtr(E)) + return false; + } + return true; + } else if (ElemType->isArrayType()) { + for (size_t I = 0; I != NumElems; ++I) { + if (!this->emitConstUint32(I, E)) + return false; + if (!this->emitArrayElemPtr(PT_Uint32, E)) + return false; + if (!this->visitZeroArrayInitializer(ElemType, E)) + return false; + if (!this->emitPopPtr(E)) + return false; + } + return true; + } + + return false; +} + template template bool Compiler::emitConst(T Value, PrimType Ty, const Expr *E) { @@ -4033,7 +4060,7 @@ unsigned Compiler::allocateLocalPrimitive(DeclTy &&Src, PrimType Ty, // (int){12} in C. Consider using Expr::isTemporaryObject() instead // or isa(). Descriptor *D = P.createDescriptor(Src, Ty, Descriptor::InlineDescMD, IsConst, - Src.is()); + isa(Src)); Scope::Local Local = this->createLocal(D); if (auto *VD = dyn_cast_if_present(Src.dyn_cast())) Locals.insert({VD, Local}); @@ -6471,8 +6498,23 @@ bool Compiler::emitBuiltinBitCast(const CastExpr *E) { } // Get a pointer to the value-to-cast on the stack. - if (!this->visit(SubExpr)) - return false; + // For CK_LValueToRValueBitCast, this is always an lvalue and + // we later assume it to be one (i.e. a PT_Ptr). However, + // we call this function for other utility methods where + // a bitcast might be useful, so convert it to a PT_Ptr in that case. + if (SubExpr->isGLValue()) { + if (!this->visit(SubExpr)) + return false; + } else if (std::optional FromT = classify(SubExpr)) { + unsigned TempOffset = allocateLocalPrimitive( + SubExpr, *FromT, /*IsConst=*/true, /*IsExtended=*/false); + if (!this->visit(SubExpr)) + return false; + if (!this->emitSetLocal(*FromT, TempOffset, E)) + return false; + if (!this->emitGetPtrLocal(TempOffset, E)) + return false; + } if (!ToT || ToT == PT_Ptr) { if (!this->emitBitCastPtr(E)) diff --git a/clang/lib/AST/ByteCode/Compiler.h b/clang/lib/AST/ByteCode/Compiler.h index d1b624daba6b9..2a94f5ec76b6c 100644 --- a/clang/lib/AST/ByteCode/Compiler.h +++ b/clang/lib/AST/ByteCode/Compiler.h @@ -325,6 +325,7 @@ class Compiler : public ConstStmtVisitor, bool>, /// Emits a zero initializer. bool visitZeroInitializer(PrimType T, QualType QT, const Expr *E); bool visitZeroRecordInitializer(const Record *R, const Expr *E); + bool visitZeroArrayInitializer(QualType T, const Expr *E); /// Emits an APSInt constant. bool emitConst(const llvm::APSInt &Value, PrimType Ty, const Expr *E); diff --git a/clang/lib/AST/ByteCode/Disasm.cpp b/clang/lib/AST/ByteCode/Disasm.cpp index 85522ffd32dcc..496c1dcef59b5 100644 --- a/clang/lib/AST/ByteCode/Disasm.cpp +++ b/clang/lib/AST/ByteCode/Disasm.cpp @@ -33,7 +33,7 @@ using namespace clang; using namespace clang::interp; -template inline T ReadArg(Program &P, CodePtr &OpPC) { +template inline static T ReadArg(Program &P, CodePtr &OpPC) { if constexpr (std::is_pointer_v) { uint32_t ID = OpPC.read(); return reinterpret_cast(P.getNativePointer(ID)); diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 144f2291651cc..b450d8263c30b 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -47,7 +47,7 @@ static APSInt getAPSIntParam(const InterpFrame *Frame, unsigned Index) { return R; } -PrimType getIntPrimType(const InterpState &S) { +static PrimType getIntPrimType(const InterpState &S) { const TargetInfo &TI = S.getASTContext().getTargetInfo(); unsigned IntWidth = TI.getIntWidth(); @@ -58,7 +58,7 @@ PrimType getIntPrimType(const InterpState &S) { llvm_unreachable("Int isn't 16 or 32 bit?"); } -PrimType getLongPrimType(const InterpState &S) { +static PrimType getLongPrimType(const InterpState &S) { const TargetInfo &TI = S.getASTContext().getTargetInfo(); unsigned LongWidth = TI.getLongWidth(); diff --git a/clang/lib/AST/ByteCode/InterpFrame.cpp b/clang/lib/AST/ByteCode/InterpFrame.cpp index 7f02464a1c0f1..20f67d9b1fd42 100644 --- a/clang/lib/AST/ByteCode/InterpFrame.cpp +++ b/clang/lib/AST/ByteCode/InterpFrame.cpp @@ -234,7 +234,12 @@ SourceInfo InterpFrame::getSource(CodePtr PC) const { if (Func && !funcHasUsableBody(Func) && Caller) return Caller->getSource(RetPC); - return S.getSource(Func, PC); + // Similarly, if the resulting source location is invalid anyway, + // point to the caller instead. + SourceInfo Result = S.getSource(Func, PC); + if (Result.getLoc().isInvalid() && Caller) + return Caller->getSource(RetPC); + return Result; } const Expr *InterpFrame::getExpr(CodePtr PC) const { diff --git a/clang/lib/AST/ByteCode/Program.cpp b/clang/lib/AST/ByteCode/Program.cpp index 590ee19de6d2f..c98a3506b0a90 100644 --- a/clang/lib/AST/ByteCode/Program.cpp +++ b/clang/lib/AST/ByteCode/Program.cpp @@ -158,7 +158,7 @@ unsigned Program::getOrCreateDummy(const DeclTy &D) { if (const auto *E = D.dyn_cast()) { QT = E->getType(); } else { - const ValueDecl *VD = cast(D.get()); + const ValueDecl *VD = cast(cast(D)); IsWeak = VD->isWeak(); QT = VD->getType(); if (const auto *RT = QT->getAs()) diff --git a/clang/lib/AST/ComputeDependence.cpp b/clang/lib/AST/ComputeDependence.cpp index e37ebec085195..07c4419e3cf40 100644 --- a/clang/lib/AST/ComputeDependence.cpp +++ b/clang/lib/AST/ComputeDependence.cpp @@ -388,9 +388,8 @@ ExprDependence clang::computeDependence(PackIndexingExpr *E) { ExprDependence::Instantiation; ArrayRef Exprs = E->getExpressions(); - if (Exprs.empty()) + if (Exprs.empty() || !E->isFullySubstituted()) D |= PatternDep | ExprDependence::Instantiation; - else if (!E->getIndexExpr()->isInstantiationDependent()) { std::optional Index = E->getSelectedIndex(); assert(Index && *Index < Exprs.size() && "pack index out of bound"); diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index bfeb4827f7958..741e908cf9bc5 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -1991,7 +1991,7 @@ void DeclaratorDecl::setQualifierInfo(NestedNameSpecifierLoc QualifierLoc) { // Make sure the extended decl info is allocated. if (!hasExtInfo()) { // Save (non-extended) type source info pointer. - auto *savedTInfo = DeclInfo.get(); + auto *savedTInfo = cast(DeclInfo); // Allocate external info struct. DeclInfo = new (getASTContext()) ExtInfo; // Restore savedTInfo into (extended) decl info. @@ -2010,7 +2010,7 @@ void DeclaratorDecl::setTrailingRequiresClause(Expr *TrailingRequiresClause) { // Make sure the extended decl info is allocated. if (!hasExtInfo()) { // Save (non-extended) type source info pointer. - auto *savedTInfo = DeclInfo.get(); + auto *savedTInfo = cast(DeclInfo); // Allocate external info struct. DeclInfo = new (getASTContext()) ExtInfo; // Restore savedTInfo into (extended) decl info. @@ -2026,7 +2026,7 @@ void DeclaratorDecl::setTemplateParameterListsInfo( // Make sure the extended decl info is allocated. if (!hasExtInfo()) { // Save (non-extended) type source info pointer. - auto *savedTInfo = DeclInfo.get(); + auto *savedTInfo = cast(DeclInfo); // Allocate external info struct. DeclInfo = new (getASTContext()) ExtInfo; // Restore savedTInfo into (extended) decl info. @@ -2534,7 +2534,7 @@ EvaluatedStmt *VarDecl::ensureEvaluatedStmt() const { // work to avoid leaking those, but we do so in VarDecl::evaluateValue // where we can detect whether there's anything to clean up or not. Eval = new (getASTContext()) EvaluatedStmt; - Eval->Value = Init.get(); + Eval->Value = cast(Init); Init = Eval; } return Eval; @@ -3017,7 +3017,7 @@ void ParmVarDecl::setUninstantiatedDefaultArg(Expr *arg) { Expr *ParmVarDecl::getUninstantiatedDefaultArg() { assert(hasUninstantiatedDefaultArg() && "Wrong kind of initialization expression!"); - return cast_if_present(Init.get()); + return cast_if_present(cast(Init)); } bool ParmVarDecl::hasDefaultArg() const { @@ -4010,12 +4010,12 @@ FunctionDecl::TemplatedKind FunctionDecl::getTemplatedKind() const { "No other valid types in NamedDecl"); return TK_FunctionTemplate; } - if (TemplateOrSpecialization.is()) + if (isa(TemplateOrSpecialization)) return TK_MemberSpecialization; - if (TemplateOrSpecialization.is()) + if (isa(TemplateOrSpecialization)) return TK_FunctionTemplateSpecialization; - if (TemplateOrSpecialization.is - ()) + if (isa( + TemplateOrSpecialization)) return TK_DependentFunctionTemplateSpecialization; llvm_unreachable("Did we miss a TemplateOrSpecialization type?"); @@ -4062,9 +4062,9 @@ void FunctionDecl::setDescribedFunctionTemplate( } bool FunctionDecl::isFunctionTemplateSpecialization() const { - return TemplateOrSpecialization.is() || - TemplateOrSpecialization - .is(); + return isa(TemplateOrSpecialization) || + isa( + TemplateOrSpecialization); } void FunctionDecl::setInstantiatedFromDecl(FunctionDecl *FD) { @@ -4216,7 +4216,7 @@ void FunctionDecl::setFunctionTemplateSpecialization( const TemplateArgumentListInfo *TemplateArgsAsWritten, SourceLocation PointOfInstantiation) { assert((TemplateOrSpecialization.isNull() || - TemplateOrSpecialization.is()) && + isa(TemplateOrSpecialization)) && "Member function is already a specialization"); assert(TSK != TSK_Undeclared && "Must specify the type of function template specialization"); @@ -4287,8 +4287,8 @@ TemplateSpecializationKind FunctionDecl::getTemplateSpecializationKind() const { // A dependent function template specialization is an explicit specialization, // except when it's a friend declaration. - if (TemplateOrSpecialization - .is() && + if (isa( + TemplateOrSpecialization) && getFriendObjectKind() == FOK_None) return TSK_ExplicitSpecialization; @@ -4331,8 +4331,8 @@ FunctionDecl::getTemplateSpecializationKindForInstantiation() const { TemplateOrSpecialization.dyn_cast()) return MSInfo->getTemplateSpecializationKind(); - if (TemplateOrSpecialization - .is() && + if (isa( + TemplateOrSpecialization) && getFriendObjectKind() == FOK_None) return TSK_ExplicitSpecialization; diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp index 96638b85c452b..fb701f76231bc 100644 --- a/clang/lib/AST/DeclBase.cpp +++ b/clang/lib/AST/DeclBase.cpp @@ -1500,7 +1500,8 @@ DeclContext *DeclContext::getPrimaryContext() { } template -void collectAllContextsImpl(T *Self, SmallVectorImpl &Contexts) { +static void collectAllContextsImpl(T *Self, + SmallVectorImpl &Contexts) { for (T *D = Self->getMostRecentDecl(); D; D = D->getPreviousDecl()) Contexts.push_back(D); diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp index 39c548e9c2253..25560faae8672 100644 --- a/clang/lib/AST/DeclCXX.cpp +++ b/clang/lib/AST/DeclCXX.cpp @@ -2733,14 +2733,14 @@ int64_t CXXCtorInitializer::getID(const ASTContext &Context) const { TypeLoc CXXCtorInitializer::getBaseClassLoc() const { if (isBaseInitializer()) - return Initializee.get()->getTypeLoc(); + return cast(Initializee)->getTypeLoc(); else return {}; } const Type *CXXCtorInitializer::getBaseClass() const { if (isBaseInitializer()) - return Initializee.get()->getType().getTypePtr(); + return cast(Initializee)->getType().getTypePtr(); else return nullptr; } @@ -2752,7 +2752,7 @@ SourceLocation CXXCtorInitializer::getSourceLocation() const { if (isAnyMemberInitializer()) return getMemberLocation(); - if (const auto *TSInfo = Initializee.get()) + if (const auto *TSInfo = cast(Initializee)) return TSInfo->getTypeLoc().getBeginLoc(); return {}; diff --git a/clang/lib/AST/DeclFriend.cpp b/clang/lib/AST/DeclFriend.cpp index d003842bfb7c7..6bfc2eb62b284 100644 --- a/clang/lib/AST/DeclFriend.cpp +++ b/clang/lib/AST/DeclFriend.cpp @@ -36,8 +36,7 @@ FriendDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L, SourceLocation EllipsisLoc, ArrayRef FriendTypeTPLists) { #ifndef NDEBUG - if (Friend.is()) { - const auto *D = Friend.get(); + if (const auto *D = dyn_cast(Friend)) { assert(isa(D) || isa(D) || isa(D) || diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp index f487032a37ab7..1da3f26bf23cd 100644 --- a/clang/lib/AST/DeclTemplate.cpp +++ b/clang/lib/AST/DeclTemplate.cpp @@ -992,7 +992,7 @@ ClassTemplateSpecializationDecl::getSpecializedTemplate() const { if (const auto *PartialSpec = SpecializedTemplate.dyn_cast()) return PartialSpec->PartialSpecialization->getSpecializedTemplate(); - return SpecializedTemplate.get(); + return cast(SpecializedTemplate); } SourceRange @@ -1008,7 +1008,7 @@ ClassTemplateSpecializationDecl::getSourceRange() const { if (const auto *CTPSD = Pattern.dyn_cast()) return CTPSD->getSourceRange(); - return Pattern.get()->getSourceRange(); + return cast(Pattern)->getSourceRange(); } case TSK_ExplicitSpecialization: { SourceRange Range = CXXRecordDecl::getSourceRange(); @@ -1404,7 +1404,7 @@ VarTemplateDecl *VarTemplateSpecializationDecl::getSpecializedTemplate() const { if (const auto *PartialSpec = SpecializedTemplate.dyn_cast()) return PartialSpec->PartialSpecialization->getSpecializedTemplate(); - return SpecializedTemplate.get(); + return cast(SpecializedTemplate); } SourceRange VarTemplateSpecializationDecl::getSourceRange() const { @@ -1419,7 +1419,7 @@ SourceRange VarTemplateSpecializationDecl::getSourceRange() const { if (const auto *VTPSD = Pattern.dyn_cast()) return VTPSD->getSourceRange(); - VarTemplateDecl *VTD = Pattern.get(); + VarTemplateDecl *VTD = cast(Pattern); if (hasInit()) { if (VarTemplateDecl *Definition = VTD->getDefinition()) return Definition->getSourceRange(); diff --git a/clang/lib/AST/ExprCXX.cpp b/clang/lib/AST/ExprCXX.cpp index 0ce129de85f03..fc09d24fc30cb 100644 --- a/clang/lib/AST/ExprCXX.cpp +++ b/clang/lib/AST/ExprCXX.cpp @@ -162,7 +162,7 @@ QualType CXXTypeidExpr::getTypeOperand(const ASTContext &Context) const { assert(isTypeOperand() && "Cannot call getTypeOperand for typeid(expr)"); Qualifiers Quals; return Context.getUnqualifiedArrayType( - Operand.get()->getType().getNonReferenceType(), Quals); + cast(Operand)->getType().getNonReferenceType(), Quals); } static bool isGLValueFromPointerDeref(const Expr *E) { @@ -216,7 +216,7 @@ QualType CXXUuidofExpr::getTypeOperand(ASTContext &Context) const { assert(isTypeOperand() && "Cannot call getTypeOperand for __uuidof(expr)"); Qualifiers Quals; return Context.getUnqualifiedArrayType( - Operand.get()->getType().getNonReferenceType(), Quals); + cast(Operand)->getType().getNonReferenceType(), Quals); } // CXXScalarValueInitExpr @@ -1717,9 +1717,9 @@ NonTypeTemplateParmDecl *SubstNonTypeTemplateParmExpr::getParameter() const { PackIndexingExpr *PackIndexingExpr::Create( ASTContext &Context, SourceLocation EllipsisLoc, SourceLocation RSquareLoc, Expr *PackIdExpr, Expr *IndexExpr, std::optional Index, - ArrayRef SubstitutedExprs, bool ExpandedToEmptyPack) { + ArrayRef SubstitutedExprs, bool FullySubstituted) { QualType Type; - if (Index && !SubstitutedExprs.empty()) + if (Index && FullySubstituted && !SubstitutedExprs.empty()) Type = SubstitutedExprs[*Index]->getType(); else Type = Context.DependentTy; @@ -1728,7 +1728,7 @@ PackIndexingExpr *PackIndexingExpr::Create( Context.Allocate(totalSizeToAlloc(SubstitutedExprs.size())); return new (Storage) PackIndexingExpr(Type, EllipsisLoc, RSquareLoc, PackIdExpr, IndexExpr, - SubstitutedExprs, ExpandedToEmptyPack); + SubstitutedExprs, FullySubstituted); } NamedDecl *PackIndexingExpr::getPackDecl() const { @@ -1829,11 +1829,11 @@ void MaterializeTemporaryExpr::setExtendingDecl(ValueDecl *ExtendedBy, // We may need to allocate extra storage for the mangling number and the // extended-by ValueDecl. - if (!State.is()) + if (!isa(State)) State = LifetimeExtendedTemporaryDecl::Create( - cast(State.get()), ExtendedBy, ManglingNumber); + cast(cast(State)), ExtendedBy, ManglingNumber); - auto ES = State.get(); + auto ES = cast(State); ES->ExtendingDecl = ExtendedBy; ES->ManglingNumber = ManglingNumber; } diff --git a/clang/lib/AST/ExprConcepts.cpp b/clang/lib/AST/ExprConcepts.cpp index 6efe73ea085a7..e6afcdd5dc3e8 100644 --- a/clang/lib/AST/ExprConcepts.cpp +++ b/clang/lib/AST/ExprConcepts.cpp @@ -94,8 +94,7 @@ ConceptSpecializationExpr::Create(const ASTContext &C, ConceptReference *Loc, const TypeConstraint * concepts::ExprRequirement::ReturnTypeRequirement::getTypeConstraint() const { assert(isTypeConstraint()); - auto TPL = - TypeConstraintInfo.getPointer().get(); + auto TPL = cast(TypeConstraintInfo.getPointer()); return cast(TPL->getParam(0)) ->getTypeConstraint(); } diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index c6a210459240a..c6d003073966f 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -3824,8 +3824,8 @@ static QualType getSubobjectType(QualType ObjType, QualType SubobjType, } /// Find the designated sub-object of an rvalue. -template -typename SubobjectHandler::result_type +template +static typename SubobjectHandler::result_type findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj, const SubobjectDesignator &Sub, SubobjectHandler &handler) { if (Sub.Invalid) @@ -7106,7 +7106,7 @@ static std::optional CheckDeleteKind(EvalInfo &Info, const Expr *E, } // Perform a call to 'operator delete' or '__builtin_operator_delete'. -bool HandleOperatorDeleteCall(EvalInfo &Info, const CallExpr *E) { +static bool HandleOperatorDeleteCall(EvalInfo &Info, const CallExpr *E) { if (Info.checkingPotentialConstantExpression() || Info.SpeculativeEvaluationDepth) return false; diff --git a/clang/lib/AST/ItaniumCXXABI.cpp b/clang/lib/AST/ItaniumCXXABI.cpp index bf152ca35431c..a1b2551419f5e 100644 --- a/clang/lib/AST/ItaniumCXXABI.cpp +++ b/clang/lib/AST/ItaniumCXXABI.cpp @@ -75,17 +75,17 @@ struct DecompositionDeclName { } namespace llvm { -template bool isDenseMapKeyEmpty(T V) { +template static bool isDenseMapKeyEmpty(T V) { return llvm::DenseMapInfo::isEqual( V, llvm::DenseMapInfo::getEmptyKey()); } -template bool isDenseMapKeyTombstone(T V) { +template static bool isDenseMapKeyTombstone(T V) { return llvm::DenseMapInfo::isEqual( V, llvm::DenseMapInfo::getTombstoneKey()); } template -std::optional areDenseMapKeysEqualSpecialValues(T LHS, T RHS) { +static std::optional areDenseMapKeysEqualSpecialValues(T LHS, T RHS) { bool LHSEmpty = isDenseMapKeyEmpty(LHS); bool RHSEmpty = isDenseMapKeyEmpty(RHS); if (LHSEmpty || RHSEmpty) diff --git a/clang/lib/AST/ParentMapContext.cpp b/clang/lib/AST/ParentMapContext.cpp index 9723c0cfa83bb..af7d9fcdc638b 100644 --- a/clang/lib/AST/ParentMapContext.cpp +++ b/clang/lib/AST/ParentMapContext.cpp @@ -50,7 +50,7 @@ DynTypedNode ParentMapContext::traverseIgnored(const DynTypedNode &N) const { } template -std::tuple +static std::tuple matchParents(const DynTypedNodeList &NodeList, ParentMapContext::ParentMap *ParentMap); @@ -107,7 +107,7 @@ class ParentMapContext::ParentMap { return DynTypedNode::create(*D); if (const auto *S = U.dyn_cast()) return DynTypedNode::create(*S); - return *U.get(); + return *cast(U); } template @@ -127,17 +127,17 @@ class ParentMapContext::ParentMap { ParentMap(ASTContext &Ctx); ~ParentMap() { for (const auto &Entry : PointerParents) { - if (Entry.second.is()) { - delete Entry.second.get(); - } else if (Entry.second.is()) { - delete Entry.second.get(); + if (auto *DTN = dyn_cast(Entry.second)) { + delete DTN; + } else if (auto *PV = dyn_cast(Entry.second)) { + delete PV; } } for (const auto &Entry : OtherParents) { - if (Entry.second.is()) { - delete Entry.second.get(); - } else if (Entry.second.is()) { - delete Entry.second.get(); + if (auto *DTN = dyn_cast(Entry.second)) { + delete DTN; + } else if (auto *PV = dyn_cast(Entry.second)) { + delete PV; } } } @@ -392,14 +392,14 @@ class ParentMapContext::ParentMap::ASTVisitor else NodeOrVector = new DynTypedNode(ParentStack.back()); } else { - if (!NodeOrVector.template is()) { + if (!isa(NodeOrVector)) { auto *Vector = new ParentVector( 1, getSingleDynTypedNodeFromParentMap(NodeOrVector)); delete NodeOrVector.template dyn_cast(); NodeOrVector = Vector; } - auto *Vector = NodeOrVector.template get(); + auto *Vector = cast(NodeOrVector); // Skip duplicates for types that have memoization data. // We must check that the type has memoization data before calling // llvm::is_contained() because DynTypedNode::operator== can't compare all diff --git a/clang/lib/AST/TemplateName.cpp b/clang/lib/AST/TemplateName.cpp index c500507fecdf5..7d6275caedc4f 100644 --- a/clang/lib/AST/TemplateName.cpp +++ b/clang/lib/AST/TemplateName.cpp @@ -151,13 +151,13 @@ TemplateName::NameKind TemplateName::getKind() const { return Template; } - if (Storage.is()) + if (isa(Storage)) return DependentTemplate; - if (Storage.is()) + if (isa(Storage)) return QualifiedTemplate; - UncommonTemplateNameStorage *uncommon - = Storage.get(); + UncommonTemplateNameStorage *uncommon = + cast(Storage); if (uncommon->getAsOverloadedStorage()) return OverloadedTemplate; if (uncommon->getAsAssumedTemplateName()) diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index b70f86ef31442..edf20944f0b3e 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -4031,12 +4031,12 @@ void DependentDecltypeType::Profile(llvm::FoldingSetNodeID &ID, PackIndexingType::PackIndexingType(const ASTContext &Context, QualType Canonical, QualType Pattern, - Expr *IndexExpr, bool ExpandsToEmptyPack, + Expr *IndexExpr, bool FullySubstituted, ArrayRef Expansions) : Type(PackIndexing, Canonical, computeDependence(Pattern, IndexExpr, Expansions)), Context(Context), Pattern(Pattern), IndexExpr(IndexExpr), - Size(Expansions.size()), ExpandsToEmptyPack(ExpandsToEmptyPack) { + Size(Expansions.size()), FullySubstituted(FullySubstituted) { std::uninitialized_copy(Expansions.begin(), Expansions.end(), getTrailingObjects()); @@ -4081,10 +4081,10 @@ PackIndexingType::computeDependence(QualType Pattern, Expr *IndexExpr, void PackIndexingType::Profile(llvm::FoldingSetNodeID &ID, const ASTContext &Context, QualType Pattern, - Expr *E, bool ExpandsToEmptyPack) { + Expr *E, bool FullySubstituted) { Pattern.Profile(ID); E->Profile(ID, Context, true); - ID.AddBoolean(ExpandsToEmptyPack); + ID.AddBoolean(FullySubstituted); } UnaryTransformType::UnaryTransformType(QualType BaseType, diff --git a/clang/lib/Analysis/ExprMutationAnalyzer.cpp b/clang/lib/Analysis/ExprMutationAnalyzer.cpp index 5a95ef36d0502..be0e8aa5743dd 100644 --- a/clang/lib/Analysis/ExprMutationAnalyzer.cpp +++ b/clang/lib/Analysis/ExprMutationAnalyzer.cpp @@ -231,12 +231,12 @@ ExprMutationAnalyzer::Analyzer::findPointeeMutation(const Decl *Dec) { const Stmt *ExprMutationAnalyzer::Analyzer::findMutationMemoized( const Expr *Exp, llvm::ArrayRef Finders, Memoized::ResultMap &MemoizedResults) { + // Assume Exp is not mutated before analyzing Exp. auto [Memoized, Inserted] = MemoizedResults.try_emplace(Exp); if (!Inserted) return Memoized->second; - // Assume Exp is not mutated before analyzing Exp. - if (isUnevaluated(Exp)) + if (ExprMutationAnalyzer::isUnevaluated(Exp, Context)) return nullptr; for (const auto &Finder : Finders) { @@ -268,41 +268,29 @@ ExprMutationAnalyzer::Analyzer::tryEachDeclRef(const Decl *Dec, return nullptr; } -bool ExprMutationAnalyzer::Analyzer::isUnevaluated(const Stmt *Exp, - const Stmt &Stm, - ASTContext &Context) { - return selectFirst( - NodeID::value, - match( - findFirst( - stmt(canResolveToExpr(Exp), - anyOf( - // `Exp` is part of the underlying expression of - // decltype/typeof if it has an ancestor of - // typeLoc. - hasAncestor(typeLoc(unless( - hasAncestor(unaryExprOrTypeTraitExpr())))), - hasAncestor(expr(anyOf( - // `UnaryExprOrTypeTraitExpr` is unevaluated - // unless it's sizeof on VLA. - unaryExprOrTypeTraitExpr(unless(sizeOfExpr( - hasArgumentOfType(variableArrayType())))), - // `CXXTypeidExpr` is unevaluated unless it's - // applied to an expression of glvalue of - // polymorphic class type. - cxxTypeidExpr( - unless(isPotentiallyEvaluated())), - // The controlling expression of - // `GenericSelectionExpr` is unevaluated. - genericSelectionExpr(hasControllingExpr( - hasDescendant(equalsNode(Exp)))), - cxxNoexceptExpr()))))) - .bind(NodeID::value)), - Stm, Context)) != nullptr; -} - -bool ExprMutationAnalyzer::Analyzer::isUnevaluated(const Expr *Exp) { - return isUnevaluated(Exp, Stm, Context); +bool ExprMutationAnalyzer::isUnevaluated(const Stmt *Stm, ASTContext &Context) { + return !match(stmt(anyOf( + // `Exp` is part of the underlying expression of + // decltype/typeof if it has an ancestor of + // typeLoc. + hasAncestor(typeLoc( + unless(hasAncestor(unaryExprOrTypeTraitExpr())))), + hasAncestor(expr(anyOf( + // `UnaryExprOrTypeTraitExpr` is unevaluated + // unless it's sizeof on VLA. + unaryExprOrTypeTraitExpr(unless(sizeOfExpr( + hasArgumentOfType(variableArrayType())))), + // `CXXTypeidExpr` is unevaluated unless it's + // applied to an expression of glvalue of + // polymorphic class type. + cxxTypeidExpr(unless(isPotentiallyEvaluated())), + // The controlling expression of + // `GenericSelectionExpr` is unevaluated. + genericSelectionExpr( + hasControllingExpr(hasDescendant(equalsNode(Stm)))), + cxxNoexceptExpr()))))), + *Stm, Context) + .empty(); } const Stmt * diff --git a/clang/lib/Basic/Attributes.cpp b/clang/lib/Basic/Attributes.cpp index 6904bce3ac51e..fa26cc584b724 100644 --- a/clang/lib/Basic/Attributes.cpp +++ b/clang/lib/Basic/Attributes.cpp @@ -156,7 +156,7 @@ std::string AttributeCommonInfo::getNormalizedFullName() const { normalizeName(getAttrName(), getScopeName(), getSyntax())); } -AttributeCommonInfo::Scope +static AttributeCommonInfo::Scope getScopeFromNormalizedScopeName(StringRef ScopeName) { return llvm::StringSwitch(ScopeName) .Case("", AttributeCommonInfo::Scope::NONE) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e9c9be907e31d..f32d5a2f43559 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -209,7 +209,7 @@ static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) { return LastInst; } -Value *handleAsDoubleBuiltin(CodeGenFunction &CGF, const CallExpr *E) { +static Value *handleAsDoubleBuiltin(CodeGenFunction &CGF, const CallExpr *E) { assert((E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() && E->getArg(1)->getType()->hasUnsignedIntegerRepresentation()) && "asdouble operands types mismatch"); @@ -19028,7 +19028,7 @@ static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) { return RT.getUDotIntrinsic(); } -Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT) { +static Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT) { if (QT->hasSignedIntegerRepresentation()) { return RT.getFirstBitSHighIntrinsic(); } @@ -19409,6 +19409,15 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: { CGM.getHLSLRuntime().getRadiansIntrinsic(), ArrayRef{Op0}, nullptr, "hlsl.radians"); } + case Builtin::BI__builtin_hlsl_buffer_update_counter: { + Value *ResHandle = EmitScalarExpr(E->getArg(0)); + Value *Offset = EmitScalarExpr(E->getArg(1)); + Value *OffsetI8 = Builder.CreateIntCast(Offset, Int8Ty, true); + return Builder.CreateIntrinsic( + /*ReturnType=*/Offset->getType(), + CGM.getHLSLRuntime().getBufferUpdateCounterIntrinsic(), + ArrayRef{ResHandle, OffsetI8}, nullptr); + } case Builtin::BI__builtin_hlsl_elementwise_splitdouble: { assert((E->getArg(0)->getType()->hasFloatingRepresentation() && @@ -19697,8 +19706,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16: - case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16: { - + case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16: + case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32: + case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32: + case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32: + case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16: { Intrinsic::ID IID; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32: @@ -19713,6 +19725,18 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16: IID = Intrinsic::amdgcn_global_load_tr_b128; break; + case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32: + IID = Intrinsic::amdgcn_ds_read_tr4_b64; + break; + case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32: + IID = Intrinsic::amdgcn_ds_read_tr8_b64; + break; + case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32: + IID = Intrinsic::amdgcn_ds_read_tr6_b96; + break; + case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16: + IID = Intrinsic::amdgcn_ds_read_tr16_b64; + break; } llvm::Type *LoadTy = ConvertType(E->getType()); llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); @@ -20200,6 +20224,32 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType}); return Builder.CreateCall(F, {Arg}); } + case AMDGPU::BI__builtin_amdgcn_permlane16_swap: + case AMDGPU::BI__builtin_amdgcn_permlane32_swap: { + // Because builtin types are limited, and the intrinsic uses a struct/pair + // output, marshal the pair-of-i32 to <2 x i32>. + Value *VDstOld = EmitScalarExpr(E->getArg(0)); + Value *VSrcOld = EmitScalarExpr(E->getArg(1)); + Value *FI = EmitScalarExpr(E->getArg(2)); + Value *BoundCtrl = EmitScalarExpr(E->getArg(3)); + Function *F = + CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16_swap + ? Intrinsic::amdgcn_permlane16_swap + : Intrinsic::amdgcn_permlane32_swap); + llvm::CallInst *Call = + Builder.CreateCall(F, {VDstOld, VSrcOld, FI, BoundCtrl}); + + llvm::Value *Elt0 = Builder.CreateExtractValue(Call, 0); + llvm::Value *Elt1 = Builder.CreateExtractValue(Call, 1); + + llvm::Type *ResultType = ConvertType(E->getType()); + + llvm::Value *Insert0 = Builder.CreateInsertElement( + llvm::PoisonValue::get(ResultType), Elt0, UINT64_C(0)); + llvm::Value *AsVector = + Builder.CreateInsertElement(Insert0, Elt1, UINT64_C(1)); + return AsVector; + } case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc: return emitBuiltinWithOneOverloadedType<4>( *this, E, Intrinsic::amdgcn_make_buffer_rsrc); diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index a8e0ed42b79a3..854214d6bc067 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -102,6 +102,7 @@ class CGHLSLRuntime { GENERATE_HLSL_INTRINSIC_FUNCTION(UClamp, uclamp) GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromBinding, handle_fromBinding) + GENERATE_HLSL_INTRINSIC_FUNCTION(BufferUpdateCounter, bufferUpdateCounter) //===----------------------------------------------------------------------===// // End of reserved area for HLSL intrinsic getters. diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index b854eeb62a80c..716c43431667c 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2047,6 +2047,15 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) { GD.getWithKernelReferenceKind(KernelReferenceKind::Kernel), ND)); + // This invariant should hold true in the future. + // Prior work: + // https://discourse.llvm.org/t/rfc-clang-diagnostic-for-demangling-failures/82835/8 + // https://github.com/llvm/llvm-project/issues/111345 + // assert((MangledName.startswith("_Z") || MangledName.startswith("?")) && + // !GD->hasAttr() && + // llvm::demangle(MangledName) != MangledName && + // "LLVM demangler must demangle clang-generated names"); + auto Result = Manglings.insert(std::make_pair(MangledName, GD)); return MangledDeclNames[CanonicalGD] = Result.first->first(); } @@ -4553,6 +4562,9 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) { ResolverName += ".resolver"; } + bool ShouldReturnIFunc = + getTarget().supportsIFunc() && !FD->isCPUSpecificMultiVersion(); + // If the resolver has already been created, just return it. This lookup may // yield a function declaration instead of a resolver on AArch64. That is // because we didn't know whether a resolver will be generated when we first @@ -4560,8 +4572,7 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) { // targets which support ifuncs should not return here unless we actually // found an ifunc. llvm::GlobalValue *ResolverGV = GetGlobalValue(ResolverName); - if (ResolverGV && - (isa(ResolverGV) || !getTarget().supportsIFunc())) + if (ResolverGV && (isa(ResolverGV) || !ShouldReturnIFunc)) return ResolverGV; const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD); @@ -4574,7 +4585,7 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) { // For cpu_specific, don't create an ifunc yet because we don't know if the // cpu_dispatch will be emitted in this translation unit. - if (getTarget().supportsIFunc() && !FD->isCPUSpecificMultiVersion()) { + if (ShouldReturnIFunc) { unsigned AS = getTypes().getTargetAddressSpace(FD->getType()); llvm::Type *ResolverType = llvm::FunctionType::get(llvm::PointerType::get(DeclTy, AS), false); @@ -4593,11 +4604,9 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) { llvm::Constant *Resolver = GetOrCreateLLVMFunction( ResolverName, DeclTy, GlobalDecl{}, /*ForVTable=*/false); - assert(isa(Resolver) && + assert(isa(Resolver) && !ResolverGV && "Resolver should be created for the first time"); SetCommonAttributes(FD, cast(Resolver)); - if (ResolverGV) - replaceDeclarationWith(ResolverGV, Resolver); return Resolver; } diff --git a/clang/lib/Driver/ToolChains/Hexagon.cpp b/clang/lib/Driver/ToolChains/Hexagon.cpp index 29781399cbab4..383dc8387e75e 100644 --- a/clang/lib/Driver/ToolChains/Hexagon.cpp +++ b/clang/lib/Driver/ToolChains/Hexagon.cpp @@ -378,9 +378,9 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA, if (NeedsXRayDeps) linkXRayRuntimeDeps(HTC, Args, CmdArgs); - CmdArgs.push_back("-lclang_rt.builtins-hexagon"); if (!Args.hasArg(options::OPT_nolibc)) CmdArgs.push_back("-lc"); + CmdArgs.push_back("-lclang_rt.builtins-hexagon"); } if (D.CCCIsCXX()) { if (HTC.ShouldLinkCXXStdlib(Args)) diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index fd53969e4b3b3..aed86c1fb9955 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -693,17 +693,14 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, bool DisallowLineBreaksOnThisLine = Style.LambdaBodyIndentation == FormatStyle::LBI_Signature && - Style.isCpp() && [&Current] { - // Deal with lambda arguments in C++. The aim here is to ensure that we - // don't over-indent lambda function bodies when lambdas are passed as - // arguments to function calls. We do this by ensuring that either all - // arguments (including any lambdas) go on the same line as the function - // call, or we break before the first argument. - const auto *Prev = Current.Previous; - if (!Prev) - return false; + // Deal with lambda arguments in C++. The aim here is to ensure that we + // don't over-indent lambda function bodies when lambdas are passed as + // arguments to function calls. We do this by ensuring that either all + // arguments (including any lambdas) go on the same line as the function + // call, or we break before the first argument. + Style.isCpp() && [&] { // For example, `/*Newline=*/false`. - if (Prev->is(TT_BlockComment) && Current.SpacesRequiredBefore == 0) + if (Previous.is(TT_BlockComment) && Current.SpacesRequiredBefore == 0) return false; const auto *PrevNonComment = Current.getPreviousNonComment(); if (!PrevNonComment || PrevNonComment->isNot(tok::l_paren)) diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 0cf4cdbeab31f..ee52972ce66f4 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -2179,7 +2179,8 @@ class ParensRemover : public TokenAnalyzer { tooling::Replacements &Result) { const auto &SourceMgr = Env.getSourceManager(); for (auto *Line : Lines) { - removeParens(Line->Children, Result); + if (!Line->Children.empty()) + removeParens(Line->Children, Result); if (!Line->Affected) continue; for (const auto *Token = Line->First; Token && !Token->Finalized; @@ -2224,7 +2225,8 @@ class BracesInserter : public TokenAnalyzer { const auto &SourceMgr = Env.getSourceManager(); int OpeningBraceSurplus = 0; for (AnnotatedLine *Line : Lines) { - insertBraces(Line->Children, Result); + if (!Line->Children.empty()) + insertBraces(Line->Children, Result); if (!Line->Affected && OpeningBraceSurplus == 0) continue; for (FormatToken *Token = Line->First; Token && !Token->Finalized; @@ -2275,20 +2277,21 @@ class BracesRemover : public TokenAnalyzer { void removeBraces(SmallVectorImpl &Lines, tooling::Replacements &Result) { const auto &SourceMgr = Env.getSourceManager(); - const auto End = Lines.end(); - for (auto I = Lines.begin(); I != End; ++I) { - const auto Line = *I; - removeBraces(Line->Children, Result); + const auto *End = Lines.end(); + for (const auto *I = Lines.begin(); I != End; ++I) { + const auto &Line = *I; + if (!Line->Children.empty()) + removeBraces(Line->Children, Result); if (!Line->Affected) continue; - const auto NextLine = I + 1 == End ? nullptr : I[1]; - for (auto Token = Line->First; Token && !Token->Finalized; + const auto *NextLine = I + 1 == End ? nullptr : I[1]; + for (const auto *Token = Line->First; Token && !Token->Finalized; Token = Token->Next) { if (!Token->Optional) continue; if (!Token->isOneOf(tok::l_brace, tok::r_brace)) continue; - auto Next = Token->Next; + auto *Next = Token->Next; assert(Next || Token == Line->Last); if (!Next && NextLine) Next = NextLine->First; @@ -2299,7 +2302,7 @@ class BracesRemover : public TokenAnalyzer { } else { Start = Token->WhitespaceRange.getBegin(); } - const auto Range = + const auto &Range = CharSourceRange::getCharRange(Start, Token->Tok.getEndLoc()); cantFail(Result.add(tooling::Replacement(SourceMgr, Range, ""))); } @@ -2334,21 +2337,22 @@ class SemiRemover : public TokenAnalyzer { return LBrace && LBrace->is(TT_FunctionLBrace); }; const auto &SourceMgr = Env.getSourceManager(); - const auto End = Lines.end(); - for (auto I = Lines.begin(); I != End; ++I) { - const auto Line = *I; - removeSemi(Annotator, Line->Children, Result); + const auto *End = Lines.end(); + for (const auto *I = Lines.begin(); I != End; ++I) { + const auto &Line = *I; + if (!Line->Children.empty()) + removeSemi(Annotator, Line->Children, Result); if (!Line->Affected) continue; Annotator.calculateFormattingInformation(*Line); - const auto NextLine = I + 1 == End ? nullptr : I[1]; - for (auto Token = Line->First; Token && !Token->Finalized; + const auto *NextLine = I + 1 == End ? nullptr : I[1]; + for (const auto *Token = Line->First; Token && !Token->Finalized; Token = Token->Next) { if (Token->isNot(tok::semi) || (!Token->Optional && !PrecededByFunctionRBrace(*Token))) { continue; } - auto Next = Token->Next; + auto *Next = Token->Next; assert(Next || Token == Line->Last); if (!Next && NextLine) Next = NextLine->First; @@ -2359,7 +2363,7 @@ class SemiRemover : public TokenAnalyzer { } else { Start = Token->WhitespaceRange.getBegin(); } - const auto Range = + const auto &Range = CharSourceRange::getCharRange(Start, Token->Tok.getEndLoc()); cantFail(Result.add(tooling::Replacement(SourceMgr, Range, ""))); } diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 9a0fdb175ff29..9b611bfcc9e63 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -1103,7 +1103,15 @@ static void InitializePredefinedMacros(const TargetInfo &TI, assert(TI.getCharWidth() == 8 && "Only support 8-bit char so far"); Builder.defineMacro("__CHAR_BIT__", Twine(TI.getCharWidth())); - Builder.defineMacro("__BOOL_WIDTH__", Twine(TI.getBoolWidth())); + // The macro is specifying the number of bits in the width, not the number of + // bits the object requires for its in-memory representation, which is what + // getBoolWidth() will return. The bool/_Bool data type is only ever one bit + // wide. See C23 6.2.6.2p2 for the rules in C. Note that + // C++23 [basic.fundamental]p10 allows an implementation-defined value + // representation for bool; when lowering to LLVM, Clang represents bool as an + // i8 in memory but as an i1 when the value is needed, so '1' is also correct + // for C++. + Builder.defineMacro("__BOOL_WIDTH__", "1"); Builder.defineMacro("__SHRT_WIDTH__", Twine(TI.getShortWidth())); Builder.defineMacro("__INT_WIDTH__", Twine(TI.getIntWidth())); Builder.defineMacro("__LONG_WIDTH__", Twine(TI.getLongWidth())); diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp index f6d787a0c8831..cd4504630f871 100644 --- a/clang/lib/Parse/ParseStmt.cpp +++ b/clang/lib/Parse/ParseStmt.cpp @@ -799,7 +799,7 @@ StmtResult Parser::ParseLabeledStatement(ParsedAttributes &Attrs, } // If we've not parsed a statement yet, parse one now. - if (!SubStmt.isInvalid() && !SubStmt.isUsable()) + if (SubStmt.isUnset()) SubStmt = ParseStatement(nullptr, StmtCtx); // Broken substmt shouldn't prevent the label from being added to the AST. diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp b/clang/lib/Sema/HLSLExternalSemaSource.cpp index 822202fd81dc8..886a4c098580a 100644 --- a/clang/lib/Sema/HLSLExternalSemaSource.cpp +++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp @@ -12,7 +12,9 @@ #include "clang/Sema/HLSLExternalSemaSource.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" +#include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" +#include "clang/AST/Expr.h" #include "clang/AST/Type.h" #include "clang/Basic/SourceLocation.h" #include "clang/Sema/Lookup.h" @@ -20,36 +22,43 @@ #include "clang/Sema/SemaHLSL.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Frontend/HLSL/HLSLResource.h" +#include "llvm/Support/ErrorHandling.h" #include using namespace clang; using namespace llvm::hlsl; +static FunctionDecl *lookupBuiltinFunction(Sema &S, StringRef Name); + namespace { struct TemplateParameterListBuilder; struct BuiltinTypeDeclBuilder { + Sema &SemaRef; CXXRecordDecl *Record = nullptr; ClassTemplateDecl *Template = nullptr; ClassTemplateDecl *PrevTemplate = nullptr; NamespaceDecl *HLSLNamespace = nullptr; llvm::StringMap Fields; - BuiltinTypeDeclBuilder(CXXRecordDecl *R) : Record(R) { + BuiltinTypeDeclBuilder(Sema &SemaRef, CXXRecordDecl *R) + : SemaRef(SemaRef), Record(R) { Record->startDefinition(); Template = Record->getDescribedClassTemplate(); } - BuiltinTypeDeclBuilder(Sema &S, NamespaceDecl *Namespace, StringRef Name) - : HLSLNamespace(Namespace) { - ASTContext &AST = S.getASTContext(); + BuiltinTypeDeclBuilder(Sema &SemaRef, NamespaceDecl *Namespace, + StringRef Name) + : SemaRef(SemaRef), HLSLNamespace(Namespace) { + ASTContext &AST = SemaRef.getASTContext(); IdentifierInfo &II = AST.Idents.get(Name, tok::TokenKind::identifier); - LookupResult Result(S, &II, SourceLocation(), Sema::LookupTagName); + LookupResult Result(SemaRef, &II, SourceLocation(), Sema::LookupTagName); CXXRecordDecl *PrevDecl = nullptr; - if (S.LookupQualifiedName(Result, HLSLNamespace)) { + if (SemaRef.LookupQualifiedName(Result, HLSLNamespace)) { + // Declaration already exists (from precompiled headers) NamedDecl *Found = Result.getFoundDecl(); if (auto *TD = dyn_cast(Found)) { PrevDecl = TD->getTemplatedDecl(); @@ -61,6 +70,7 @@ struct BuiltinTypeDeclBuilder { if (PrevDecl && PrevDecl->isCompleteDefinition()) { Record = PrevDecl; + Template = PrevTemplate; return; } @@ -84,8 +94,7 @@ struct BuiltinTypeDeclBuilder { BuiltinTypeDeclBuilder & addMemberVariable(StringRef Name, QualType Type, llvm::ArrayRef Attrs, AccessSpecifier Access = AccessSpecifier::AS_private) { - if (Record->isCompleteDefinition()) - return *this; + assert(!Record->isCompleteDefinition() && "record is already complete"); assert(Record->isBeingDefined() && "Definition must be started before adding members!"); ASTContext &AST = Record->getASTContext(); @@ -109,22 +118,16 @@ struct BuiltinTypeDeclBuilder { } BuiltinTypeDeclBuilder & - addHandleMember(Sema &S, ResourceClass RC, ResourceKind RK, bool IsROV, - bool RawBuffer, + addHandleMember(ResourceClass RC, ResourceKind RK, bool IsROV, bool RawBuffer, AccessSpecifier Access = AccessSpecifier::AS_private) { - if (Record->isCompleteDefinition()) - return *this; + assert(!Record->isCompleteDefinition() && "record is already complete"); - ASTContext &Ctx = S.getASTContext(); + ASTContext &Ctx = SemaRef.getASTContext(); TypeSourceInfo *ElementTypeInfo = nullptr; QualType ElemTy = Ctx.Char8Ty; - if (Template) { - if (const auto *TTD = dyn_cast( - Template->getTemplateParameters()->getParam(0))) { - ElemTy = QualType(TTD->getTypeForDecl(), 0); - } - } + if (Template) + ElemTy = getFirstTemplateTypeParam(); ElementTypeInfo = Ctx.getTrivialTypeSourceInfo(ElemTy, SourceLocation()); // add handle member with resource type attributes @@ -137,32 +140,13 @@ struct BuiltinTypeDeclBuilder { ? HLSLContainedTypeAttr::CreateImplicit(Ctx, ElementTypeInfo) : nullptr}; Attr *ResourceAttr = HLSLResourceAttr::CreateImplicit(Ctx, RK); - if (CreateHLSLAttributedResourceType(S, Ctx.HLSLResourceTy, Attrs, + if (CreateHLSLAttributedResourceType(SemaRef, Ctx.HLSLResourceTy, Attrs, AttributedResTy)) addMemberVariable("__handle", AttributedResTy, {ResourceAttr}, Access); return *this; } - static DeclRefExpr *lookupBuiltinFunction(ASTContext &AST, Sema &S, - StringRef Name) { - IdentifierInfo &II = AST.Idents.get(Name, tok::TokenKind::identifier); - DeclarationNameInfo NameInfo = - DeclarationNameInfo(DeclarationName(&II), SourceLocation()); - LookupResult R(S, NameInfo, Sema::LookupOrdinaryName); - // AllowBuiltinCreation is false but LookupDirect will create - // the builtin when searching the global scope anyways... - S.LookupName(R, S.getCurScope()); - // FIXME: If the builtin function was user-declared in global scope, - // this assert *will* fail. Should this call LookupBuiltin instead? - assert(R.isSingleResult() && - "Since this is a builtin it should always resolve!"); - auto *VD = cast(R.getFoundDecl()); - QualType Ty = VD->getType(); - return DeclRefExpr::Create(AST, NestedNameSpecifierLoc(), SourceLocation(), - VD, false, NameInfo, Ty, VK_PRValue); - } - - BuiltinTypeDeclBuilder &addDefaultHandleConstructor(Sema &S) { + BuiltinTypeDeclBuilder &addDefaultHandleConstructor() { if (Record->isCompleteDefinition()) return *this; ASTContext &AST = Record->getASTContext(); @@ -187,25 +171,18 @@ struct BuiltinTypeDeclBuilder { } BuiltinTypeDeclBuilder &addArraySubscriptOperators() { - if (Record->isCompleteDefinition()) - return *this; addArraySubscriptOperator(true); addArraySubscriptOperator(false); return *this; } BuiltinTypeDeclBuilder &addArraySubscriptOperator(bool IsConst) { - if (Record->isCompleteDefinition()) - return *this; + assert(!Record->isCompleteDefinition() && "record is already complete"); ASTContext &AST = Record->getASTContext(); QualType ElemTy = AST.Char8Ty; - if (Template) { - if (const auto *TTD = dyn_cast( - Template->getTemplateParameters()->getParam(0))) { - ElemTy = QualType(TTD->getTypeForDecl(), 0); - } - } + if (Template) + ElemTy = getFirstTemplateTypeParam(); QualType ReturnTy = ElemTy; FunctionProtoType::ExtProtoInfo ExtInfo; @@ -271,16 +248,31 @@ struct BuiltinTypeDeclBuilder { return *this; } + FieldDecl *getResourceHandleField() { + auto I = Fields.find("__handle"); + assert(I != Fields.end() && + I->second->getType()->isHLSLAttributedResourceType() && + "record does not have resource handle field"); + return I->second; + } + + QualType getFirstTemplateTypeParam() { + assert(Template && "record it not a template"); + if (const auto *TTD = dyn_cast( + Template->getTemplateParameters()->getParam(0))) { + return QualType(TTD->getTypeForDecl(), 0); + } + return QualType(); + } + BuiltinTypeDeclBuilder &startDefinition() { - if (Record->isCompleteDefinition()) - return *this; + assert(!Record->isCompleteDefinition() && "record is already complete"); Record->startDefinition(); return *this; } BuiltinTypeDeclBuilder &completeDefinition() { - if (Record->isCompleteDefinition()) - return *this; + assert(!Record->isCompleteDefinition() && "record is already complete"); assert(Record->isBeingDefined() && "Definition must be started before completing it."); @@ -288,38 +280,47 @@ struct BuiltinTypeDeclBuilder { return *this; } - TemplateParameterListBuilder addTemplateArgumentList(Sema &S); - BuiltinTypeDeclBuilder & - addSimpleTemplateParams(Sema &S, ArrayRef Names, ConceptDecl *CD); - BuiltinTypeDeclBuilder &addConceptSpecializationExpr(Sema &S); + Expr *getConstantIntExpr(int value) { + ASTContext &AST = SemaRef.getASTContext(); + return IntegerLiteral::Create( + AST, llvm::APInt(AST.getTypeSize(AST.IntTy), value, true), AST.IntTy, + SourceLocation()); + } + + TemplateParameterListBuilder addTemplateArgumentList(); + BuiltinTypeDeclBuilder &addSimpleTemplateParams(ArrayRef Names, + ConceptDecl *CD); + + // Builtin types methods + BuiltinTypeDeclBuilder &addIncrementCounterMethod(); + BuiltinTypeDeclBuilder &addDecrementCounterMethod(); }; struct TemplateParameterListBuilder { BuiltinTypeDeclBuilder &Builder; - Sema &S; llvm::SmallVector Params; - TemplateParameterListBuilder(Sema &S, BuiltinTypeDeclBuilder &RB) - : Builder(RB), S(S) {} + TemplateParameterListBuilder(BuiltinTypeDeclBuilder &RB) : Builder(RB) {} ~TemplateParameterListBuilder() { finalizeTemplateArgs(); } TemplateParameterListBuilder & addTypeParameter(StringRef Name, QualType DefaultValue = QualType()) { - if (Builder.Record->isCompleteDefinition()) - return *this; + assert(!Builder.Record->isCompleteDefinition() && + "record is already complete"); + ASTContext &AST = Builder.SemaRef.getASTContext(); unsigned Position = static_cast(Params.size()); auto *Decl = TemplateTypeParmDecl::Create( - S.Context, Builder.Record->getDeclContext(), SourceLocation(), + AST, Builder.Record->getDeclContext(), SourceLocation(), SourceLocation(), /* TemplateDepth */ 0, Position, - &S.Context.Idents.get(Name, tok::TokenKind::identifier), + &AST.Idents.get(Name, tok::TokenKind::identifier), /* Typename */ true, /* ParameterPack */ false, /* HasTypeConstraint*/ false); if (!DefaultValue.isNull()) - Decl->setDefaultArgument( - S.Context, S.getTrivialTemplateArgumentLoc(DefaultValue, QualType(), - SourceLocation())); + Decl->setDefaultArgument(AST, + Builder.SemaRef.getTrivialTemplateArgumentLoc( + DefaultValue, QualType(), SourceLocation())); Params.emplace_back(Decl); return *this; @@ -421,14 +422,14 @@ struct TemplateParameterListBuilder { BuiltinTypeDeclBuilder &finalizeTemplateArgs(ConceptDecl *CD = nullptr) { if (Params.empty()) return Builder; - ConceptSpecializationExpr *CSE = - CD ? constructConceptSpecializationExpr(S, CD) : nullptr; - auto *ParamList = TemplateParameterList::Create(S.Context, SourceLocation(), - SourceLocation(), Params, - SourceLocation(), CSE); + ASTContext &AST = Builder.SemaRef.Context; + ConceptSpecializationExpr *CSE = + CD ? constructConceptSpecializationExpr(Builder.SemaRef, CD) : nullptr; + auto *ParamList = TemplateParameterList::Create( + AST, SourceLocation(), SourceLocation(), Params, SourceLocation(), CSE); Builder.Template = ClassTemplateDecl::Create( - S.Context, Builder.Record->getDeclContext(), SourceLocation(), + AST, Builder.Record->getDeclContext(), SourceLocation(), DeclarationName(Builder.Record->getIdentifier()), ParamList, Builder.Record); @@ -443,26 +444,233 @@ struct TemplateParameterListBuilder { Params.clear(); QualType T = Builder.Template->getInjectedClassNameSpecialization(); - T = S.Context.getInjectedClassNameType(Builder.Record, T); + T = AST.getInjectedClassNameType(Builder.Record, T); return Builder; } }; + +// Builder for methods of builtin types. Allows adding methods to builtin types +// using the builder pattern like this: +// +// BuiltinTypeMethodBuilder(Sema, RecordBuilder, "MethodName", ReturnType) +// .addParam("param_name", Type, InOutModifier) +// .callBuiltin("buildin_name", { BuiltinParams }) +// .finalizeMethod(); +// +// The builder needs to have all of the method parameters before it can create +// a CXXMethodDecl. It collects them in addParam calls and when a first +// method that builds the body is called or when access to 'this` is needed it +// creates the CXXMethodDecl and ParmVarDecls instances. These can then be +// referenced from the body building methods. Destructor or an explicit call to +// finalizeMethod() will complete the method definition. +// +// The callBuiltin helper method passes in the resource handle as the first +// argument of the builtin call. If this is not desired it takes a bool flag to +// disable this. +// +// If the method that is being built has a non-void return type the +// finalizeMethod will create a return statent with the value of the last +// statement (unless the last statement is already a ReturnStmt). +struct BuiltinTypeMethodBuilder { + struct MethodParam { + const IdentifierInfo &NameII; + QualType Ty; + HLSLParamModifierAttr::Spelling Modifier; + MethodParam(const IdentifierInfo &NameII, QualType Ty, + HLSLParamModifierAttr::Spelling Modifier) + : NameII(NameII), Ty(Ty), Modifier(Modifier) {} + }; + + BuiltinTypeDeclBuilder &DeclBuilder; + DeclarationNameInfo NameInfo; + QualType ReturnTy; + CXXMethodDecl *Method; + llvm::SmallVector Params; + llvm::SmallVector StmtsList; + +public: + BuiltinTypeMethodBuilder(Sema &S, BuiltinTypeDeclBuilder &DB, StringRef Name, + QualType ReturnTy) + : DeclBuilder(DB), ReturnTy(ReturnTy), Method(nullptr) { + const IdentifierInfo &II = + S.getASTContext().Idents.get(Name, tok::TokenKind::identifier); + NameInfo = DeclarationNameInfo(DeclarationName(&II), SourceLocation()); + } + + BuiltinTypeMethodBuilder &addParam(StringRef Name, QualType Ty, + HLSLParamModifierAttr::Spelling Modifier = + HLSLParamModifierAttr::Keyword_in) { + assert(Method == nullptr && "Cannot add param, method already created"); + llvm_unreachable("not yet implemented"); + } + +private: + void createMethodDecl() { + assert(Method == nullptr && "Method already created"); + + // create method type + ASTContext &AST = DeclBuilder.SemaRef.getASTContext(); + SmallVector ParamTypes; + for (MethodParam &MP : Params) + ParamTypes.emplace_back(MP.Ty); + QualType MethodTy = AST.getFunctionType(ReturnTy, ParamTypes, + FunctionProtoType::ExtProtoInfo()); + + // create method decl + auto *TSInfo = AST.getTrivialTypeSourceInfo(MethodTy, SourceLocation()); + Method = + CXXMethodDecl::Create(AST, DeclBuilder.Record, SourceLocation(), + NameInfo, MethodTy, TSInfo, SC_None, false, false, + ConstexprSpecKind::Unspecified, SourceLocation()); + + // create params & set them to the function prototype + SmallVector ParmDecls; + auto FnProtoLoc = + Method->getTypeSourceInfo()->getTypeLoc().getAs(); + for (int I = 0, E = Params.size(); I != E; I++) { + MethodParam &MP = Params[I]; + ParmVarDecl *Parm = ParmVarDecl::Create( + AST, Method->getDeclContext(), SourceLocation(), SourceLocation(), + &MP.NameII, MP.Ty, + AST.getTrivialTypeSourceInfo(MP.Ty, SourceLocation()), SC_None, + nullptr); + if (MP.Modifier != HLSLParamModifierAttr::Keyword_in) { + auto *Mod = + HLSLParamModifierAttr::Create(AST, SourceRange(), MP.Modifier); + Parm->addAttr(Mod); + } + ParmDecls.push_back(Parm); + FnProtoLoc.setParam(I, Parm); + } + Method->setParams({ParmDecls}); + } + +public: + ~BuiltinTypeMethodBuilder() { finalizeMethod(); } + + Expr *getResourceHandleExpr() { + // The first statement added to a method or access to 'this' creates the + // declaration. + if (!Method) + createMethodDecl(); + + ASTContext &AST = DeclBuilder.SemaRef.getASTContext(); + CXXThisExpr *This = CXXThisExpr::Create( + AST, SourceLocation(), Method->getFunctionObjectParameterType(), true); + FieldDecl *HandleField = DeclBuilder.getResourceHandleField(); + return MemberExpr::CreateImplicit(AST, This, false, HandleField, + HandleField->getType(), VK_LValue, + OK_Ordinary); + } + + BuiltinTypeMethodBuilder & + callBuiltin(StringRef BuiltinName, ArrayRef CallParms, + bool AddResourceHandleAsFirstArg = true) { + + // The first statement added to a method or access to 'this` creates the + // declaration. + if (!Method) + createMethodDecl(); + + ASTContext &AST = DeclBuilder.SemaRef.getASTContext(); + FunctionDecl *FD = lookupBuiltinFunction(DeclBuilder.SemaRef, BuiltinName); + DeclRefExpr *DRE = DeclRefExpr::Create( + AST, NestedNameSpecifierLoc(), SourceLocation(), FD, false, + FD->getNameInfo(), FD->getType(), VK_PRValue); + + SmallVector NewCallParms; + if (AddResourceHandleAsFirstArg) { + NewCallParms.push_back(getResourceHandleExpr()); + for (auto *P : CallParms) + NewCallParms.push_back(P); + } + + Expr *Call = CallExpr::Create( + AST, DRE, AddResourceHandleAsFirstArg ? NewCallParms : CallParms, + FD->getReturnType(), VK_PRValue, SourceLocation(), FPOptionsOverride()); + StmtsList.push_back(Call); + return *this; + } + + BuiltinTypeDeclBuilder &finalizeMethod() { + assert(!DeclBuilder.Record->isCompleteDefinition() && + "record is already complete"); + assert( + Method != nullptr && + "method decl not created; are you missing a call to build the body?"); + + if (!Method->hasBody()) { + ASTContext &AST = DeclBuilder.SemaRef.getASTContext(); + assert((ReturnTy == AST.VoidTy || !StmtsList.empty()) && + "nothing to return from non-void method"); + if (ReturnTy != AST.VoidTy) { + if (Expr *LastExpr = dyn_cast(StmtsList.back())) { + assert(AST.hasSameUnqualifiedType( + isa(LastExpr) + ? cast(LastExpr)->getCallReturnType(AST) + : LastExpr->getType(), + ReturnTy) && + "Return type of the last statement must match the return type " + "of the method"); + if (!isa(LastExpr)) { + StmtsList.pop_back(); + StmtsList.push_back( + ReturnStmt::Create(AST, SourceLocation(), LastExpr, nullptr)); + } + } + } + + Method->setBody(CompoundStmt::Create(AST, StmtsList, FPOptionsOverride(), + SourceLocation(), SourceLocation())); + Method->setLexicalDeclContext(DeclBuilder.Record); + Method->setAccess(AccessSpecifier::AS_public); + Method->addAttr(AlwaysInlineAttr::CreateImplicit( + AST, SourceRange(), AlwaysInlineAttr::CXX11_clang_always_inline)); + DeclBuilder.Record->addDecl(Method); + } + return DeclBuilder; + } +}; + } // namespace -TemplateParameterListBuilder -BuiltinTypeDeclBuilder::addTemplateArgumentList(Sema &S) { - return TemplateParameterListBuilder(S, *this); +TemplateParameterListBuilder BuiltinTypeDeclBuilder::addTemplateArgumentList() { + return TemplateParameterListBuilder(*this); } -BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::addSimpleTemplateParams( - Sema &S, ArrayRef Names, ConceptDecl *CD = nullptr) { - TemplateParameterListBuilder Builder = this->addTemplateArgumentList(S); +BuiltinTypeDeclBuilder & +BuiltinTypeDeclBuilder::addSimpleTemplateParams(ArrayRef Names, + ConceptDecl *CD = nullptr) { + if (Record->isCompleteDefinition()) { + assert(Template && "existing record it not a template"); + assert(Template->getTemplateParameters()->size() == Names.size() && + "template param count mismatch"); + return *this; + } + + TemplateParameterListBuilder Builder = this->addTemplateArgumentList(); for (StringRef Name : Names) Builder.addTypeParameter(Name); return Builder.finalizeTemplateArgs(CD); } +BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::addIncrementCounterMethod() { + return BuiltinTypeMethodBuilder(SemaRef, *this, "IncrementCounter", + SemaRef.getASTContext().UnsignedIntTy) + .callBuiltin("__builtin_hlsl_buffer_update_counter", + {getConstantIntExpr(1)}) + .finalizeMethod(); +} + +BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::addDecrementCounterMethod() { + return BuiltinTypeMethodBuilder(SemaRef, *this, "DecrementCounter", + SemaRef.getASTContext().UnsignedIntTy) + .callBuiltin("__builtin_hlsl_buffer_update_counter", + {getConstantIntExpr(-1)}) + .finalizeMethod(); +} + HLSLExternalSemaSource::~HLSLExternalSemaSource() {} void HLSLExternalSemaSource::InitializeSema(Sema &S) { @@ -566,13 +774,13 @@ void HLSLExternalSemaSource::defineTrivialHLSLTypes() { static BuiltinTypeDeclBuilder setupBufferType(CXXRecordDecl *Decl, Sema &S, ResourceClass RC, ResourceKind RK, bool IsROV, bool RawBuffer) { - return BuiltinTypeDeclBuilder(Decl) - .addHandleMember(S, RC, RK, IsROV, RawBuffer) - .addDefaultHandleConstructor(S); + return BuiltinTypeDeclBuilder(S, Decl) + .addHandleMember(RC, RK, IsROV, RawBuffer) + .addDefaultHandleConstructor(); } -Expr *constructTypedBufferConstraintExpr(Sema &S, SourceLocation NameLoc, - TemplateTypeParmDecl *T) { +static Expr *constructTypedBufferConstraintExpr(Sema &S, SourceLocation NameLoc, + TemplateTypeParmDecl *T) { ASTContext &Context = S.getASTContext(); // Obtain the QualType for 'unsigned long' @@ -592,7 +800,8 @@ Expr *constructTypedBufferConstraintExpr(Sema &S, SourceLocation NameLoc, return TypedResExpr; } -ConceptDecl *constructTypedBufferConceptDecl(Sema &S, NamespaceDecl *NSD) { +static ConceptDecl *constructTypedBufferConceptDecl(Sema &S, + NamespaceDecl *NSD) { ASTContext &Context = S.getASTContext(); DeclContext *DC = NSD->getDeclContext(); SourceLocation DeclLoc = SourceLocation(); @@ -636,8 +845,7 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() { ConceptDecl *TypedBufferConcept = constructTypedBufferConceptDecl(*SemaPtr, HLSLNamespace); Decl = BuiltinTypeDeclBuilder(*SemaPtr, HLSLNamespace, "RWBuffer") - .addSimpleTemplateParams(*SemaPtr, {"element_type"}, - TypedBufferConcept) + .addSimpleTemplateParams({"element_type"}, TypedBufferConcept) .Record; onCompletion(Decl, [this](CXXRecordDecl *Decl) { @@ -650,7 +858,7 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() { Decl = BuiltinTypeDeclBuilder(*SemaPtr, HLSLNamespace, "RasterizerOrderedBuffer") - .addSimpleTemplateParams(*SemaPtr, {"element_type"}) + .addSimpleTemplateParams({"element_type"}) .Record; onCompletion(Decl, [this](CXXRecordDecl *Decl) { setupBufferType(Decl, *SemaPtr, ResourceClass::UAV, @@ -661,7 +869,7 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() { }); Decl = BuiltinTypeDeclBuilder(*SemaPtr, HLSLNamespace, "StructuredBuffer") - .addSimpleTemplateParams(*SemaPtr, {"element_type"}) + .addSimpleTemplateParams({"element_type"}) .Record; onCompletion(Decl, [this](CXXRecordDecl *Decl) { setupBufferType(Decl, *SemaPtr, ResourceClass::SRV, ResourceKind::RawBuffer, @@ -671,18 +879,20 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() { }); Decl = BuiltinTypeDeclBuilder(*SemaPtr, HLSLNamespace, "RWStructuredBuffer") - .addSimpleTemplateParams(*SemaPtr, {"element_type"}) + .addSimpleTemplateParams({"element_type"}) .Record; onCompletion(Decl, [this](CXXRecordDecl *Decl) { setupBufferType(Decl, *SemaPtr, ResourceClass::UAV, ResourceKind::RawBuffer, /*IsROV=*/false, /*RawBuffer=*/true) .addArraySubscriptOperators() + .addIncrementCounterMethod() + .addDecrementCounterMethod() .completeDefinition(); }); Decl = BuiltinTypeDeclBuilder(*SemaPtr, HLSLNamespace, "AppendStructuredBuffer") - .addSimpleTemplateParams(*SemaPtr, {"element_type"}) + .addSimpleTemplateParams({"element_type"}) .Record; onCompletion(Decl, [this](CXXRecordDecl *Decl) { setupBufferType(Decl, *SemaPtr, ResourceClass::UAV, ResourceKind::RawBuffer, @@ -692,7 +902,7 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() { Decl = BuiltinTypeDeclBuilder(*SemaPtr, HLSLNamespace, "ConsumeStructuredBuffer") - .addSimpleTemplateParams(*SemaPtr, {"element_type"}) + .addSimpleTemplateParams({"element_type"}) .Record; onCompletion(Decl, [this](CXXRecordDecl *Decl) { setupBufferType(Decl, *SemaPtr, ResourceClass::UAV, ResourceKind::RawBuffer, @@ -702,19 +912,22 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() { Decl = BuiltinTypeDeclBuilder(*SemaPtr, HLSLNamespace, "RasterizerOrderedStructuredBuffer") - .addSimpleTemplateParams(*SemaPtr, {"element_type"}) + .addSimpleTemplateParams({"element_type"}) .Record; onCompletion(Decl, [this](CXXRecordDecl *Decl) { setupBufferType(Decl, *SemaPtr, ResourceClass::UAV, ResourceKind::RawBuffer, /*IsROV=*/true, /*RawBuffer=*/true) .addArraySubscriptOperators() + .addIncrementCounterMethod() + .addDecrementCounterMethod() .completeDefinition(); }); } void HLSLExternalSemaSource::onCompletion(CXXRecordDecl *Record, CompletionFunction Fn) { - Completions.insert(std::make_pair(Record->getCanonicalDecl(), Fn)); + if (!Record->isCompleteDefinition()) + Completions.insert(std::make_pair(Record->getCanonicalDecl(), Fn)); } void HLSLExternalSemaSource::CompleteType(TagDecl *Tag) { @@ -732,3 +945,19 @@ void HLSLExternalSemaSource::CompleteType(TagDecl *Tag) { return; It->second(Record); } + +static FunctionDecl *lookupBuiltinFunction(Sema &S, StringRef Name) { + IdentifierInfo &II = + S.getASTContext().Idents.get(Name, tok::TokenKind::identifier); + DeclarationNameInfo NameInfo = + DeclarationNameInfo(DeclarationName(&II), SourceLocation()); + LookupResult R(S, NameInfo, Sema::LookupOrdinaryName); + // AllowBuiltinCreation is false but LookupDirect will create + // the builtin when searching the global scope anyways... + S.LookupName(R, S.getCurScope()); + // FIXME: If the builtin function was user-declared in global scope, + // this assert *will* fail. Should this call LookupBuiltin instead? + assert(R.isSingleResult() && + "Since this is a builtin it should always resolve!"); + return cast(R.getFoundDecl()); +} diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 6c7472ce92703..c9d7444d5865a 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -983,6 +983,9 @@ Sema::VarArgKind Sema::isValidVarArgType(const QualType &Ty) { if (getLangOpts().MSVCCompat) return VAK_MSVCUndefined; + if (getLangOpts().HLSL && Ty->getAs()) + return VAK_Valid; + // FIXME: In C++11, these cases are conditionally-supported, meaning we're // permitted to reject them. We should consider doing so. return VAK_Undefined; diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 2bc93e4ec1181..8109c3a2cc0f1 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -1696,7 +1696,17 @@ static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall) { return true; } -bool CheckArgTypeIsCorrect( +static bool CheckArgTypeMatches(Sema *S, Expr *Arg, QualType ExpectedType) { + QualType ArgType = Arg->getType(); + if (!S->getASTContext().hasSameUnqualifiedType(ArgType, ExpectedType)) { + S->Diag(Arg->getBeginLoc(), diag::err_typecheck_convert_incompatible) + << ArgType << ExpectedType << 1 << 0 << 0; + return true; + } + return false; +} + +static bool CheckArgTypeIsCorrect( Sema *S, Expr *Arg, QualType ExpectedType, llvm::function_ref Check) { QualType PassedType = Arg->getType(); @@ -1711,7 +1721,7 @@ bool CheckArgTypeIsCorrect( return false; } -bool CheckAllArgTypesAreCorrect( +static bool CheckAllArgTypesAreCorrect( Sema *S, CallExpr *TheCall, QualType ExpectedType, llvm::function_ref Check) { for (unsigned i = 0; i < TheCall->getNumArgs(); ++i) { @@ -1878,6 +1888,29 @@ static bool CheckVectorSelect(Sema *S, CallExpr *TheCall) { return false; } +static bool CheckResourceHandle( + Sema *S, CallExpr *TheCall, unsigned ArgIndex, + llvm::function_ref Check = + nullptr) { + assert(TheCall->getNumArgs() >= ArgIndex); + QualType ArgType = TheCall->getArg(ArgIndex)->getType(); + const HLSLAttributedResourceType *ResTy = + ArgType.getTypePtr()->getAs(); + if (!ResTy) { + S->Diag(TheCall->getArg(0)->getBeginLoc(), + diag::err_typecheck_expect_hlsl_resource) + << ArgType; + return true; + } + if (Check && Check(ResTy)) { + S->Diag(TheCall->getArg(ArgIndex)->getExprLoc(), + diag::err_invalid_hlsl_resource_type) + << ArgType; + return true; + } + return false; +} + // Note: returning true in this case results in CheckBuiltinFunctionCall // returning an ExprError bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { @@ -2176,6 +2209,27 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return true; break; } + case Builtin::BI__builtin_hlsl_buffer_update_counter: { + auto checkResTy = [](const HLSLAttributedResourceType *ResTy) -> bool { + return !(ResTy->getAttrs().ResourceClass == ResourceClass::UAV && + ResTy->getAttrs().RawBuffer && ResTy->hasContainedType()); + }; + if (SemaRef.checkArgCount(TheCall, 2) || + CheckResourceHandle(&SemaRef, TheCall, 0, checkResTy) || + CheckArgTypeMatches(&SemaRef, TheCall->getArg(1), + SemaRef.getASTContext().IntTy)) + return true; + Expr *OffsetExpr = TheCall->getArg(1); + std::optional Offset = + OffsetExpr->getIntegerConstantExpr(SemaRef.getASTContext()); + if (!Offset.has_value() || std::abs(Offset->getExtValue()) != 1) { + SemaRef.Diag(TheCall->getArg(1)->getBeginLoc(), + diag::err_hlsl_expect_arg_const_int_one_or_neg_one) + << 1; + return true; + } + break; + } } return false; } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 5d6b835e6da82..976d48e124913 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -11102,7 +11102,8 @@ StmtResult SemaOpenMP::ActOnOpenMPFlushDirective(ArrayRef Clauses, for (const OMPClause *C : Clauses) { if (C->getClauseKind() == OMPC_acq_rel || C->getClauseKind() == OMPC_acquire || - C->getClauseKind() == OMPC_release) { + C->getClauseKind() == OMPC_release || + C->getClauseKind() == OMPC_seq_cst /*OpenMP 5.1*/) { if (MemOrderKind != OMPC_unknown) { Diag(C->getBeginLoc(), diag::err_omp_several_mem_order_clauses) << getOpenMPDirectiveName(OMPD_flush) << 1 diff --git a/clang/lib/Sema/SemaTemplateVariadic.cpp b/clang/lib/Sema/SemaTemplateVariadic.cpp index 2ea2a368dd24c..86d15f6324f4f 100644 --- a/clang/lib/Sema/SemaTemplateVariadic.cpp +++ b/clang/lib/Sema/SemaTemplateVariadic.cpp @@ -1157,10 +1157,12 @@ ExprResult Sema::ActOnPackIndexingExpr(Scope *S, Expr *PackExpression, return Res; } -ExprResult -Sema::BuildPackIndexingExpr(Expr *PackExpression, SourceLocation EllipsisLoc, - Expr *IndexExpr, SourceLocation RSquareLoc, - ArrayRef ExpandedExprs, bool EmptyPack) { +ExprResult Sema::BuildPackIndexingExpr(Expr *PackExpression, + SourceLocation EllipsisLoc, + Expr *IndexExpr, + SourceLocation RSquareLoc, + ArrayRef ExpandedExprs, + bool FullySubstituted) { std::optional Index; if (!IndexExpr->isInstantiationDependent()) { @@ -1174,8 +1176,8 @@ Sema::BuildPackIndexingExpr(Expr *PackExpression, SourceLocation EllipsisLoc, IndexExpr = Res.get(); } - if (Index && (!ExpandedExprs.empty() || EmptyPack)) { - if (*Index < 0 || EmptyPack || *Index >= int64_t(ExpandedExprs.size())) { + if (Index && FullySubstituted) { + if (*Index < 0 || *Index >= int64_t(ExpandedExprs.size())) { Diag(PackExpression->getBeginLoc(), diag::err_pack_index_out_of_bound) << *Index << PackExpression << ExpandedExprs.size(); return ExprError(); @@ -1184,7 +1186,7 @@ Sema::BuildPackIndexingExpr(Expr *PackExpression, SourceLocation EllipsisLoc, return PackIndexingExpr::Create(getASTContext(), EllipsisLoc, RSquareLoc, PackExpression, IndexExpr, Index, - ExpandedExprs, EmptyPack); + ExpandedExprs, FullySubstituted); } TemplateArgumentLoc Sema::getTemplateArgumentPackExpansionPattern( diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 1465bba87724b..9cf1b2d073a90 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -3670,10 +3670,10 @@ class TreeTransform { SourceLocation RSquareLoc, Expr *PackIdExpression, Expr *IndexExpr, ArrayRef ExpandedExprs, - bool EmptyPack = false) { + bool FullySubstituted = false) { return getSema().BuildPackIndexingExpr(PackIdExpression, EllipsisLoc, IndexExpr, RSquareLoc, ExpandedExprs, - EmptyPack); + FullySubstituted); } /// Build a new expression representing a call to a source location @@ -6769,6 +6769,7 @@ TreeTransform::TransformPackIndexingType(TypeLocBuilder &TLB, if (Out.isNull()) return QualType(); SubtitutedTypes.push_back(Out); + FullySubstituted &= !Out->containsUnexpandedParameterPack(); } // If we're supposed to retain a pack expansion, do so by temporarily // forgetting the partially-substituted parameter pack. @@ -15581,6 +15582,7 @@ TreeTransform::TransformPackIndexingExpr(PackIndexingExpr *E) { } SmallVector ExpandedExprs; + bool FullySubstituted = true; if (!E->expandsToEmptyPack() && E->getExpressions().empty()) { Expr *Pattern = E->getPackIdExpression(); SmallVector Unexpanded; @@ -15605,7 +15607,7 @@ TreeTransform::TransformPackIndexingExpr(PackIndexingExpr *E) { return ExprError(); return getDerived().RebuildPackIndexingExpr( E->getEllipsisLoc(), E->getRSquareLoc(), Pack.get(), IndexExpr.get(), - {}); + {}, /*FullySubstituted=*/false); } for (unsigned I = 0; I != *NumExpansions; ++I) { Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), I); @@ -15617,6 +15619,7 @@ TreeTransform::TransformPackIndexingExpr(PackIndexingExpr *E) { OrigNumExpansions); if (Out.isInvalid()) return true; + FullySubstituted = false; } ExpandedExprs.push_back(Out.get()); } @@ -15633,6 +15636,7 @@ TreeTransform::TransformPackIndexingExpr(PackIndexingExpr *E) { OrigNumExpansions); if (Out.isInvalid()) return true; + FullySubstituted = false; ExpandedExprs.push_back(Out.get()); } } else if (!E->expandsToEmptyPack()) { @@ -15644,8 +15648,7 @@ TreeTransform::TransformPackIndexingExpr(PackIndexingExpr *E) { return getDerived().RebuildPackIndexingExpr( E->getEllipsisLoc(), E->getRSquareLoc(), E->getPackIdExpression(), - IndexExpr.get(), ExpandedExprs, - /*EmptyPack=*/ExpandedExprs.size() == 0); + IndexExpr.get(), ExpandedExprs, FullySubstituted); } template diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index c39a1950a6cf2..731ad0b64dc85 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2191,7 +2191,7 @@ void ASTStmtReader::VisitSizeOfPackExpr(SizeOfPackExpr *E) { void ASTStmtReader::VisitPackIndexingExpr(PackIndexingExpr *E) { VisitExpr(E); E->TransformedExpressions = Record.readInt(); - E->ExpandedToEmptyPack = Record.readInt(); + E->FullySubstituted = Record.readInt(); E->EllipsisLoc = readSourceLocation(); E->RSquareLoc = readSourceLocation(); E->SubExprs[0] = Record.readStmt(); diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index e7f567ff59a8a..4994047d9fe10 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2191,7 +2191,7 @@ void ASTStmtWriter::VisitSizeOfPackExpr(SizeOfPackExpr *E) { void ASTStmtWriter::VisitPackIndexingExpr(PackIndexingExpr *E) { VisitExpr(E); Record.push_back(E->TransformedExpressions); - Record.push_back(E->ExpandedToEmptyPack); + Record.push_back(E->FullySubstituted); Record.AddSourceLocation(E->getEllipsisLoc()); Record.AddSourceLocation(E->getRSquareLoc()); Record.AddStmt(E->getPackIdExpression()); diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLambdaCapturesChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLambdaCapturesChecker.cpp index 9312bf0af16db..599c2179db0f0 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLambdaCapturesChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLambdaCapturesChecker.cpp @@ -114,7 +114,7 @@ class UncountedLambdaCapturesChecker if (!DRE) return; auto *MD = dyn_cast_or_null(DRE->getDecl()); - if (!MD || CE->getNumArgs() != 1) + if (!MD || CE->getNumArgs() < 1) return; auto *Arg = CE->getArg(0)->IgnoreParenCasts(); auto *ArgRef = dyn_cast(Arg); diff --git a/clang/test/AST/ByteCode/c23.c b/clang/test/AST/ByteCode/c23.c index f9157e40610cc..5154d57f6cb9e 100644 --- a/clang/test/AST/ByteCode/c23.c +++ b/clang/test/AST/ByteCode/c23.c @@ -1,5 +1,8 @@ // RUN: %clang_cc1 -std=c23 -fexperimental-new-constant-interpreter -verify=expected,both %s // RUN: %clang_cc1 -std=c23 -verify=ref,both %s +// RUN: %clang_cc1 -std=c23 -triple=aarch64_be-linux-gnu -fexperimental-new-constant-interpreter -verify=expected,both %s +// RUN: %clang_cc1 -std=c23 -triple=aarch64_be-linux-gnu -verify=ref,both %s + typedef typeof(nullptr) nullptr_t; @@ -23,5 +26,26 @@ char bar() { return ((struct S *)buffer)->c; } - static_assert((nullptr_t){} == 0); + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define LITTLE_END 1 +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +# define LITTLE_END 0 +#else +# error "huh?" +#endif + +typedef unsigned char u8x4_t __attribute__((vector_size(4))); +constexpr u8x4_t arg1 = (u8x4_t)0xCAFEBABE; // okay +#if LITTLE_END +static_assert(arg1[0] == 190); +static_assert(arg1[1] == 186); +static_assert(arg1[2] == 254); +static_assert(arg1[3] == 202); +#else +static_assert(arg1[0] == 202); +static_assert(arg1[1] == 254); +static_assert(arg1[2] == 186); +static_assert(arg1[3] == 190); +#endif diff --git a/clang/test/AST/ByteCode/placement-new.cpp b/clang/test/AST/ByteCode/placement-new.cpp index 56f54ff168f3e..7a4fc89a27dac 100644 --- a/clang/test/AST/ByteCode/placement-new.cpp +++ b/clang/test/AST/ByteCode/placement-new.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -std=c++2c -fcxx-exceptions -fexperimental-new-constant-interpreter -verify=expected,both %s +// RUN: %clang_cc1 -std=c++2c -fcxx-exceptions -fexperimental-new-constant-interpreter -verify=expected,both %s -DBYTECODE // RUN: %clang_cc1 -std=c++2c -fcxx-exceptions -verify=ref,both %s namespace std { @@ -338,3 +338,17 @@ namespace PR48606 { } static_assert(f()); } + +#ifdef BYTECODE +constexpr int N = [] // expected-error {{must be initialized by a constant expression}} \ + // expected-note {{assignment to dereferenced one-past-the-end pointer is not allowed in a constant expression}} \ + // expected-note {{in call to}} +{ + struct S { + int a[1]; + }; + S s; + ::new (s.a) int[1][2][3][4](); + return s.a[0]; +}(); +#endif diff --git a/clang/test/AST/HLSL/RWStructuredBuffer-AST.hlsl b/clang/test/AST/HLSL/RWStructuredBuffer-AST.hlsl index bef054a62e794..a1af001e2cad6 100644 --- a/clang/test/AST/HLSL/RWStructuredBuffer-AST.hlsl +++ b/clang/test/AST/HLSL/RWStructuredBuffer-AST.hlsl @@ -52,6 +52,32 @@ RWStructuredBuffer Buffer; // CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <> 'RWStructuredBuffer' lvalue implicit this // CHECK-NEXT: AlwaysInlineAttr 0x{{[0-9A-Fa-f]+}} <> Implicit always_inline +// CHECK-NEXT: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} <> IncrementCounter 'unsigned int ()' +// CHECK-NEXT: CompoundStmt 0x{{[0-9A-Fa-f]+}} <> +// CHECK-NEXT: ReturnStmt 0x{{[0-9A-Fa-f]+}} <> +// CHECK-NEXT: CallExpr 0x{{[0-9A-Fa-f]+}} <> 'unsigned int' +// CHECK-NEXT: DeclRefExpr 0x{{[0-9A-Fa-f]+}} <> 'unsigned int (...) noexcept' Function 0x{{[0-9A-Fa-f]+}} '__builtin_hlsl_buffer_update_counter' 'unsigned int (...) noexcept' +// CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <> '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]] +// CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]] +// CHECK-SAME{LITERAL}: [[hlsl::contained_type(element_type)]]' lvalue .__handle +// CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <> 'RWStructuredBuffer' lvalue implicit this +// CHECK-NEXT: IntegerLiteral 0x{{[0-9A-Fa-f]+}} <> 'int' 1 +// CHECK-NEXT: AlwaysInlineAttr 0x{{[0-9A-Fa-f]+}} <> Implicit always_inline + +// CHECK-NEXT: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} <> DecrementCounter 'unsigned int ()' +// CHECK-NEXT: CompoundStmt 0x{{[0-9A-Fa-f]+}} <> +// CHECK-NEXT: ReturnStmt 0x{{[0-9A-Fa-f]+}} <> +// CHECK-NEXT: CallExpr 0x{{[0-9A-Fa-f]+}} <> 'unsigned int' +// CHECK-NEXT: DeclRefExpr 0x{{[0-9A-Fa-f]+}} <> 'unsigned int (...) noexcept' Function 0x{{[0-9A-Fa-f]+}} '__builtin_hlsl_buffer_update_counter' 'unsigned int (...) noexcept' +// CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <> '__hlsl_resource_t +// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]] +// CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]] +// CHECK-SAME{LITERAL}: [[hlsl::contained_type(element_type)]]' lvalue .__handle +// CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <> 'RWStructuredBuffer' lvalue implicit this +// CHECK-NEXT: IntegerLiteral 0x{{[0-9A-Fa-f]+}} <> 'int' -1 +// CHECK-NEXT: AlwaysInlineAttr 0x{{[0-9A-Fa-f]+}} <> Implicit always_inline + // CHECK: ClassTemplateSpecializationDecl 0x{{[0-9A-Fa-f]+}} <> class RWStructuredBuffer definition // CHECK: TemplateArgument type 'int' diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp index b63ffed8809fe..65eee9d49106d 100644 --- a/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp +++ b/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp @@ -125,7 +125,7 @@ void noescape_lambda() { } void lambda_capture_param(RefCountable* obj) { - auto someLambda = [&] { + auto someLambda = [&]() { obj->method(); }; someLambda(); @@ -178,3 +178,10 @@ void trivial_lambda() { }; trivial_lambda(); } + +void lambda_with_args(RefCountable* obj) { + auto trivial_lambda = [&](int v) { + obj->method(); + }; + trivial_lambda(1); +} diff --git a/clang/test/C/C23/n2412.c b/clang/test/C/C23/n2412.c new file mode 100644 index 0000000000000..7d4f32ae68a73 --- /dev/null +++ b/clang/test/C/C23/n2412.c @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -verify -std=c23 -ffreestanding %s + +/* WG14 N2412: Clang 14 + * Two's complement sign representation + */ +// expected-no-diagnostics + +#include + +// GH117348 -- BOOL_WIDTH was accidentally expanding to the number of bits in +// the object representation (8) rather than the number of bits in the value +// representation (1). +static_assert(BOOL_WIDTH == 1); + +// Validate the other macro requirements. +static_assert(CHAR_WIDTH == SCHAR_WIDTH); +static_assert(CHAR_WIDTH == UCHAR_WIDTH); +static_assert(CHAR_WIDTH == CHAR_BIT); + +static_assert(USHRT_WIDTH >= 16); +static_assert(UINT_WIDTH >= 16); +static_assert(ULONG_WIDTH >= 32); +static_assert(ULLONG_WIDTH >= 64); +static_assert(BITINT_MAXWIDTH >= ULLONG_WIDTH); + +static_assert(MB_LEN_MAX >= 1); + diff --git a/clang/test/CodeGen/attr-cpuspecific.c b/clang/test/CodeGen/attr-cpuspecific.c index 628892d5809b4..6eb2fb2758738 100644 --- a/clang/test/CodeGen/attr-cpuspecific.c +++ b/clang/test/CodeGen/attr-cpuspecific.c @@ -154,6 +154,12 @@ void usages(void) { CpuSpecificNoDispatch(); // LINUX: @CpuSpecificNoDispatch.ifunc() // WINDOWS: @CpuSpecificNoDispatch() + // + // Adding another use of CpuSpecificNoDispatch reproduces the + // crash in https://github.com/llvm/llvm-project/issues/115299 + CpuSpecificNoDispatch(); + // LINUX: @CpuSpecificNoDispatch.ifunc() + // WINDOWS: @CpuSpecificNoDispatch() OrderDispatchUsageSpecific(); // LINUX: @OrderDispatchUsageSpecific.ifunc() // WINDOWS: @OrderDispatchUsageSpecific() diff --git a/clang/test/CodeGen/sanitize-coverage-gated-callbacks.c b/clang/test/CodeGen/sanitize-coverage-gated-callbacks.c index 9a00d91d5ad08..e226591d80d07 100644 --- a/clang/test/CodeGen/sanitize-coverage-gated-callbacks.c +++ b/clang/test/CodeGen/sanitize-coverage-gated-callbacks.c @@ -1,5 +1,7 @@ // RUN: %clang %s -target arm64-apple-darwin -emit-llvm -S -fsanitize-coverage=trace-pc-guard -mllvm -sanitizer-coverage-gated-trace-callbacks=1 -o - | FileCheck %s --check-prefixes=CHECK,GATED // RUN: %clang %s -target arm64-apple-darwin -emit-llvm -S -fsanitize-coverage=trace-pc-guard -mllvm -sanitizer-coverage-gated-trace-callbacks=0 -o - | FileCheck %s --check-prefixes=CHECK,PLAIN +// RUN: %clang %s -target arm64-apple-darwin -emit-llvm -S -fsanitize-coverage=trace-pc-guard,trace-cmp -mllvm -sanitizer-coverage-gated-trace-callbacks=1 -o - | FileCheck %s --check-prefixes=CHECK,GATED,GATEDCMP +// RUN: %clang %s -target arm64-apple-darwin -emit-llvm -S -fsanitize-coverage=trace-pc-guard,trace-cmp -mllvm -sanitizer-coverage-gated-trace-callbacks=0 -o - | FileCheck %s --check-prefixes=CHECK,PLAIN,PLAINCMP // RUN: not %clang %s -target arm64-apple-darwin -emit-llvm -S -fsanitize-coverage=trace-pc -mllvm -sanitizer-coverage-gated-trace-callbacks=1 -o /dev/null 2>&1 | FileCheck %s --check-prefixes=INCOMPATIBLE // RUN: not %clang %s -target arm64-apple-darwin -emit-llvm -S -fsanitize-coverage=inline-8bit-counters -mllvm -sanitizer-coverage-gated-trace-callbacks=1 -o /dev/null 2>&1 | FileCheck %s --check-prefixes=INCOMPATIBLE // RUN: not %clang %s -target arm64-apple-darwin -emit-llvm -S -fsanitize-coverage=inline-bool-flag -mllvm -sanitizer-coverage-gated-trace-callbacks=1 -o /dev/null 2>&1 | FileCheck %s --check-prefixes=INCOMPATIBLE @@ -9,7 +11,7 @@ // PLAIN-NOT: section "__DATA,__sancov_gate" // Produce an error for all incompatible sanitizer coverage modes. -// INCOMPATIBLE: error: 'sanitizer-coverage-gated-trace-callbacks' is only supported with trace-pc-guard +// INCOMPATIBLE: error: 'sanitizer-coverage-gated-trace-callbacks' is only supported with trace-pc-guard or trace-cmp int x[10]; @@ -23,6 +25,11 @@ void foo(int n, int m) { // GATED-NEXT: br i1 [[CMP]], label %[[L_TRUE:.*]], label %[[L_FALSE:.*]], !prof [[WEIGHTS:!.+]] // GATED: [[L_TRUE]]: // GATED-NEXT: call void @__sanitizer_cov_trace_pc_guard + // COM: Check the trace-cmp instrumentation of the if (n) branch + // GATEDCMP: [[OPERAND:%.*]] = load i32, {{.*}} + // GATEDCMP-NEXT: br i1 [[CMP]], label %[[L_TRUE_1:.*]], label %[[L_FALSE_1:.*]] + // GATEDCMP: [[L_TRUE_1]]: + // GATEDCMP-NEXT: call void @__sanitizer_cov_trace_const_cmp4(i32 0, i32 [[OPERAND]]) // GATED: br i1 [[CMP]], label %[[L_TRUE_2:.*]], label %[[L_FALSE_2:.*]] // GATED: [[L_TRUE_2]]: // GATED-NEXT: call void @__sanitizer_cov_trace_pc_guard @@ -33,10 +40,12 @@ void foo(int n, int m) { // PLAIN-NOT: __sancov_should_track // But we should still be emitting the calls to the callback. // PLAIN: call void @__sanitizer_cov_trace_pc_guard + // PLAINCMP: [[OPERAND:%.*]] = load i32, {{.*}} + // PLAINCMP-NEXT: call void @__sanitizer_cov_trace_const_cmp4(i32 0, i32 [[OPERAND]]) if (n) { x[n] = 42; if (m) { x[m] = 41; } } -} +} \ No newline at end of file diff --git a/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-lib.hlsl b/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-lib.hlsl new file mode 100644 index 0000000000000..128fff9b90a22 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-lib.hlsl @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-DXIL +// RUN-DISABLED: %clang_cc1 -triple spirv-vulkan-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV + +// NOTE: SPIRV codegen for resource methods is not yet implemented + +RWStructuredBuffer RWSB1 : register(u0); +RWStructuredBuffer RWSB2 : register(u1); + +// CHECK: %"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", float, 1, 0), float } + +export void TestIncrementCounter() { + RWSB1.IncrementCounter(); +} + +// CHECK: define void @_Z20TestIncrementCounterv() +// CHECK-DXIL: call i32 @llvm.dx.bufferUpdateCounter.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %{{[0-9]+}}, i8 1) + +export void TestDecrementCounter() { + RWSB2.DecrementCounter(); +} + +// CHECK: define void @_Z20TestDecrementCounterv() +// CHECK-DXIL: call i32 @llvm.dx.bufferUpdateCounter.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %{{[0-9]+}}, i8 -1) + +// CHECK: declare i32 @llvm.dx.bufferUpdateCounter.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0), i8) diff --git a/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-ps.hlsl b/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-ps.hlsl new file mode 100644 index 0000000000000..e895d30b54007 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-ps.hlsl @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-pixel -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-DXIL +// RUN-DISABLED: %clang_cc1 -triple spirv-vulkan-pixel -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV + +// NOTE: SPIRV codegen for resource methods is not yet implemented + +RWStructuredBuffer RWSB1, RWSB2; +RasterizerOrderedStructuredBuffer ROSB1, ROSB2; + +// CHECK: %"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", float, 1, 0), float } + +export void TestIncrementCounter() { +// CHECK: define void @_Z20TestIncrementCounterv() +// CHECK-DXIL: call i32 @llvm.dx.bufferUpdateCounter.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %{{[0-9]+}}, i8 1) +// CHECK-DXIL: call i32 @llvm.dx.bufferUpdateCounter.tdx.RawBuffer_f32_1_1t(target("dx.RawBuffer", float, 1, 1) %{{[0-9]+}}, i8 1) + RWSB1.IncrementCounter(); + ROSB1.IncrementCounter(); +} + +export void TestDecrementCounter() { +// CHECK: define void @_Z20TestDecrementCounterv() +// CHECK-DXIL: call i32 @llvm.dx.bufferUpdateCounter.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %{{[0-9]+}}, i8 -1) +// CHECK-DXIL: call i32 @llvm.dx.bufferUpdateCounter.tdx.RawBuffer_f32_1_1t(target("dx.RawBuffer", float, 1, 1) %{{[0-9]+}}, i8 -1) + RWSB2.DecrementCounter(); + ROSB2.DecrementCounter(); +} + +// CHECK: declare i32 @llvm.dx.bufferUpdateCounter.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0), i8) +// CHECK: declare i32 @llvm.dx.bufferUpdateCounter.tdx.RawBuffer_f32_1_1t(target("dx.RawBuffer", float, 1, 1), i8) diff --git a/clang/test/CodeGenHLSL/resource-bindings.hlsl b/clang/test/CodeGenHLSL/resource-bindings.hlsl new file mode 100644 index 0000000000000..bfec90e1871f8 --- /dev/null +++ b/clang/test/CodeGenHLSL/resource-bindings.hlsl @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -triple dxil--shadermodel6.6-compute -x hlsl -finclude-default-header -emit-llvm -o - %s | FileCheck %s + +// CHECK: define internal void @_init_resource_bindings() { + +// CHECK: %U0S0_h = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false) +RWBuffer U0S0 : register(u0); + +// CHECK: %U5S3_h = call target("dx.TypedBuffer", float, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_f32_1_0_0t(i32 3, i32 5, i32 1, i32 0, i1 false) +RWBuffer U5S3 : register(u5, space3); + +// CHECK: %T2S2_h = call target("dx.RawBuffer", i32, 0, 0) @llvm.dx.handle.fromBinding.tdx.RawBuffer_i32_0_0t(i32 2, i32 2, i32 1, i32 0, i1 false) +StructuredBuffer T2S2 : register(t2, space2); +struct S { + float4 f; + int i; +}; + +// CHECK: %T3S0_h = call target("dx.RawBuffer", %struct.S = type { <4 x float>, i32, [12 x i8] }, 0, 0) @llvm.dx.handle.fromBinding.tdx.RawBuffer_s_struct.Ss_0_0t(i32 0, i32 3, i32 1, i32 0, i1 false) +StructuredBuffer T3S0 : register(t3); diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl index 61cbf5e65d0d2..f739872685e78 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -89,7 +89,7 @@ // GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" // GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" // GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX950: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" // GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" // GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" @@ -101,17 +101,17 @@ // GFX1034: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" // GFX1035: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" // GFX1036: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" -// GFX1100: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1101: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1102: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1103: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1150: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1151: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1152: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1100: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1101: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1102: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1103: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1150: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1151: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1152: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" "uniform-work-group-size"="true" -// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64" +// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64" kernel void test() {} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl index 5db280f339e71..b8c46039dac53 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl @@ -7,12 +7,14 @@ typedef unsigned int uint; typedef half __attribute__((ext_vector_type(2))) half2; typedef short __attribute__((ext_vector_type(2))) short2; typedef unsigned short __attribute__((ext_vector_type(2))) ushort2; +typedef __bf16 __attribute__((ext_vector_type(2))) bfloat2; #pragma OPENCL EXTENSION cl_khr_fp16 : enable kernel void builtins_amdgcn_dl_insts_err( global float *fOut, global int *siOut, global uint *uiOut, global short *sOut, global int *iOut, global half *hOut, half2 v2hA, half2 v2hB, float fC, half hC, + bfloat2 v2bfbfA, bfloat2 v2bfbfB, short2 v2ssA, short2 v2ssB, short sC, int siA, int siB, int siC, ushort2 v2usA, ushort2 v2usB, uint uiA, uint uiB, uint uiC, int A, int B, int C) { @@ -23,8 +25,11 @@ kernel void builtins_amdgcn_dl_insts_err( sOut[0] = __builtin_amdgcn_fdot2_bf16_bf16(v2ssA, v2ssB, sC); // expected-error {{'__builtin_amdgcn_fdot2_bf16_bf16' needs target feature dot9-insts}} - fOut[3] = __builtin_amdgcn_fdot2_f32_bf16(v2ssA, v2ssB, fC, false); // expected-error {{'__builtin_amdgcn_fdot2_f32_bf16' needs target feature dot9-insts}} - fOut[4] = __builtin_amdgcn_fdot2_f32_bf16(v2ssA, v2ssB, fC, true); // expected-error {{'__builtin_amdgcn_fdot2_f32_bf16' needs target feature dot9-insts}} + fOut[3] = __builtin_amdgcn_fdot2_f32_bf16(v2ssA, v2ssB, fC, false); // expected-error {{'__builtin_amdgcn_fdot2_f32_bf16' needs target feature dot12-insts}} + fOut[4] = __builtin_amdgcn_fdot2_f32_bf16(v2ssA, v2ssB, fC, true); // expected-error {{'__builtin_amdgcn_fdot2_f32_bf16' needs target feature dot12-insts}} + + fOut[3] = __builtin_amdgcn_fdot2c_f32_bf16(v2bfbfA, v2bfbfB, fC, false); // expected-error {{'__builtin_amdgcn_fdot2c_f32_bf16' needs target feature dot13-insts}} + fOut[4] = __builtin_amdgcn_fdot2c_f32_bf16(v2bfbfA, v2bfbfB, fC, true); // expected-error {{'__builtin_amdgcn_fdot2c_f32_bf16' needs target feature dot13-insts}} siOut[0] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, false); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot2-insts}} siOut[1] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, true); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot2-insts}} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950-err.cl index 86f4f73c81c0f..5b75ee417e545 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950-err.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950-err.cl @@ -11,6 +11,10 @@ // REQUIRES: amdgpu-registered-target typedef unsigned int uint; -void test_prng_b32(global uint* out, uint a) { +typedef unsigned int uint2 __attribute__((ext_vector_type(2))); + +void test(global uint* out, global uint2* out_v2u32, uint a, uint b) { *out = __builtin_amdgcn_prng_b32(a); // expected-error{{'__builtin_amdgcn_prng_b32' needs target feature prng-inst}} + *out_v2u32 = __builtin_amdgcn_permlane16_swap(a, b, false, false); // expected-error{{'__builtin_amdgcn_permlane16_swap' needs target feature permlane16-swap}} + *out_v2u32 = __builtin_amdgcn_permlane32_swap(a, b, false, false); // expected-error{{'__builtin_amdgcn_permlane32_swap' needs target feature permlane32-swap}} } diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950-read-tr.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950-read-tr.cl new file mode 100644 index 0000000000000..39fa46d5845f4 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950-read-tr.cl @@ -0,0 +1,50 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx950 -emit-llvm -o - %s | FileCheck --check-prefix=GFX950 %s + +typedef int v2i __attribute__((ext_vector_type(2))); +typedef int v3i __attribute__((ext_vector_type(3))); +typedef short v4s __attribute__((ext_vector_type(4))); + +// GFX950-LABEL: define dso_local <2 x i32> @test_amdgcn_ds_read_b64_tr_b4_v2i32( +// GFX950-SAME: ptr addrspace(3) nocapture noundef readonly [[INPTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// GFX950-NEXT: entry: +// GFX950-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3) [[INPTR]]) +// GFX950-NEXT: ret <2 x i32> [[TMP0]] +// +v2i test_amdgcn_ds_read_b64_tr_b4_v2i32(local v2i* inptr) +{ + return __builtin_amdgcn_ds_read_tr4_b64_v2i32(inptr); +} + +// GFX950-LABEL: define dso_local <3 x i32> @test_amdgcn_ds_read_b96_tr_b6_v3i32( +// GFX950-SAME: ptr addrspace(3) nocapture noundef readonly [[INPTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// GFX950-NEXT: entry: +// GFX950-NEXT: [[TMP0:%.*]] = tail call <3 x i32> @llvm.amdgcn.ds.read.tr6.b96.v3i32(ptr addrspace(3) [[INPTR]]) +// GFX950-NEXT: ret <3 x i32> [[TMP0]] +// +v3i test_amdgcn_ds_read_b96_tr_b6_v3i32(local v3i* inptr) +{ + return __builtin_amdgcn_ds_read_tr6_b96_v3i32(inptr); +} + +// GFX950-LABEL: define dso_local <2 x i32> @test_amdgcn_ds_read_b64_tr_b8_v2i32( +// GFX950-SAME: ptr addrspace(3) nocapture noundef readonly [[INPTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// GFX950-NEXT: entry: +// GFX950-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.ds.read.tr8.b64.v2i32(ptr addrspace(3) [[INPTR]]) +// GFX950-NEXT: ret <2 x i32> [[TMP0]] +// +v2i test_amdgcn_ds_read_b64_tr_b8_v2i32(local v2i* inptr) +{ + return __builtin_amdgcn_ds_read_tr8_b64_v2i32(inptr); +} + +// GFX950-LABEL: define dso_local <4 x i16> @test_amdgcn_ds_read_b64_tr_b16_v2i16( +// GFX950-SAME: ptr addrspace(3) nocapture noundef readonly [[INPTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// GFX950-NEXT: entry: +// GFX950-NEXT: [[TMP0:%.*]] = tail call <4 x i16> @llvm.amdgcn.ds.read.tr16.b64.v4i16(ptr addrspace(3) [[INPTR]]) +// GFX950-NEXT: ret <4 x i16> [[TMP0]] +// +v4s test_amdgcn_ds_read_b64_tr_b16_v2i16(local v4s* inptr) +{ + return __builtin_amdgcn_ds_read_tr16_b64_v4i16(inptr); +} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl index f31ba85a52a7a..d2125e90bc2c8 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl @@ -3,6 +3,13 @@ // REQUIRES: amdgpu-registered-target typedef unsigned int uint; +typedef unsigned int __attribute__((ext_vector_type(2))) uint2; +typedef unsigned int __attribute__((ext_vector_type(6))) uint6; +typedef __bf16 __attribute__((ext_vector_type(32))) bfloat32; +typedef half __attribute__((ext_vector_type(32))) half32; +typedef short __attribute__((ext_vector_type(2))) short2; +typedef __bf16 __attribute__((ext_vector_type(2))) bfloat2; +typedef float __attribute__((ext_vector_type(16))) float16; // CHECK-LABEL: @test_prng_b32( // CHECK-NEXT: entry: @@ -19,3 +26,239 @@ typedef unsigned int uint; void test_prng_b32(global uint* out, uint a) { *out = __builtin_amdgcn_prng_b32(a); } + +// CHECK-LABEL: @test_permlane16_swap( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[OLD_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: store i32 [[OLD:%.*]], ptr addrspace(5) [[OLD_ADDR]], align 4 +// CHECK-NEXT: store i32 [[SRC:%.*]], ptr addrspace(5) [[SRC_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[OLD_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[SRC_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 [[TMP0]], i32 [[TMP1]], i1 false, i1 false) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[TMP3]], i64 0 +// CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP4]], i64 1 +// CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: store <2 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(5) [[OLD_ADDR]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(5) [[SRC_ADDR]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 [[TMP8]], i32 [[TMP9]], i1 true, i1 false) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { i32, i32 } [[TMP10]], 0 +// CHECK-NEXT: [[TMP12:%.*]] = extractvalue { i32, i32 } [[TMP10]], 1 +// CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i64 0 +// CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP12]], i64 1 +// CHECK-NEXT: [[TMP15:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: store <2 x i32> [[TMP14]], ptr addrspace(1) [[TMP15]], align 8 +// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(5) [[OLD_ADDR]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(5) [[SRC_ADDR]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 [[TMP16]], i32 [[TMP17]], i1 false, i1 true) +// CHECK-NEXT: [[TMP19:%.*]] = extractvalue { i32, i32 } [[TMP18]], 0 +// CHECK-NEXT: [[TMP20:%.*]] = extractvalue { i32, i32 } [[TMP18]], 1 +// CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x i32> poison, i32 [[TMP19]], i64 0 +// CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> [[TMP21]], i32 [[TMP20]], i64 1 +// CHECK-NEXT: [[TMP23:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: store <2 x i32> [[TMP22]], ptr addrspace(1) [[TMP23]], align 8 +// CHECK-NEXT: ret void +// +void test_permlane16_swap(global uint2* out, uint old, uint src) { + *out = __builtin_amdgcn_permlane16_swap(old, src, false, false); + *out = __builtin_amdgcn_permlane16_swap(old, src, true, false); + *out = __builtin_amdgcn_permlane16_swap(old, src, false, true); +} + +// CHECK-LABEL: @test_permlane32_swap( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[OLD_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: store i32 [[OLD:%.*]], ptr addrspace(5) [[OLD_ADDR]], align 4 +// CHECK-NEXT: store i32 [[SRC:%.*]], ptr addrspace(5) [[SRC_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[OLD_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[SRC_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 [[TMP0]], i32 [[TMP1]], i1 false, i1 false) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[TMP3]], i64 0 +// CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP4]], i64 1 +// CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: store <2 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(5) [[OLD_ADDR]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(5) [[SRC_ADDR]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 [[TMP8]], i32 [[TMP9]], i1 true, i1 false) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { i32, i32 } [[TMP10]], 0 +// CHECK-NEXT: [[TMP12:%.*]] = extractvalue { i32, i32 } [[TMP10]], 1 +// CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i64 0 +// CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP12]], i64 1 +// CHECK-NEXT: [[TMP15:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: store <2 x i32> [[TMP14]], ptr addrspace(1) [[TMP15]], align 8 +// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(5) [[OLD_ADDR]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(5) [[SRC_ADDR]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 [[TMP16]], i32 [[TMP17]], i1 false, i1 true) +// CHECK-NEXT: [[TMP19:%.*]] = extractvalue { i32, i32 } [[TMP18]], 0 +// CHECK-NEXT: [[TMP20:%.*]] = extractvalue { i32, i32 } [[TMP18]], 1 +// CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x i32> poison, i32 [[TMP19]], i64 0 +// CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> [[TMP21]], i32 [[TMP20]], i64 1 +// CHECK-NEXT: [[TMP23:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: store <2 x i32> [[TMP22]], ptr addrspace(1) [[TMP23]], align 8 +// CHECK-NEXT: ret void +// +void test_permlane32_swap(global uint2* out, uint old, uint src) { + *out = __builtin_amdgcn_permlane32_swap(old, src, false, false); + *out = __builtin_amdgcn_permlane32_swap(old, src, true, false); + *out = __builtin_amdgcn_permlane32_swap(old, src, false, true); +} + +// CHECK-LABEL: @test_cvt_scalef32_pk( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT6_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[SRCBF32_ADDR:%.*]] = alloca <32 x bfloat>, align 64, addrspace(5) +// CHECK-NEXT: [[SRCH32_ADDR:%.*]] = alloca <32 x half>, align 64, addrspace(5) +// CHECK-NEXT: [[SRC0F32_ADDR:%.*]] = alloca <16 x float>, align 64, addrspace(5) +// CHECK-NEXT: [[SRC1F32_ADDR:%.*]] = alloca <16 x float>, align 64, addrspace(5) +// CHECK-NEXT: [[SCALE_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: store ptr addrspace(1) [[OUT6:%.*]], ptr addrspace(5) [[OUT6_ADDR]], align 8 +// CHECK-NEXT: store <32 x bfloat> [[SRCBF32:%.*]], ptr addrspace(5) [[SRCBF32_ADDR]], align 64 +// CHECK-NEXT: store <32 x half> [[SRCH32:%.*]], ptr addrspace(5) [[SRCH32_ADDR]], align 64 +// CHECK-NEXT: store <16 x float> [[SRC0F32:%.*]], ptr addrspace(5) [[SRC0F32_ADDR]], align 64 +// CHECK-NEXT: store <16 x float> [[SRC1F32:%.*]], ptr addrspace(5) [[SRC1F32_ADDR]], align 64 +// CHECK-NEXT: store float [[SCALE:%.*]], ptr addrspace(5) [[SCALE_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load <32 x bfloat>, ptr addrspace(5) [[SRCBF32_ADDR]], align 64 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[SCALE_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.bf6.bf16(<32 x bfloat> [[TMP0]], float [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT6_ADDR]], align 8 +// CHECK-NEXT: store <6 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <32 x half>, ptr addrspace(5) [[SRCH32_ADDR]], align 64 +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(5) [[SCALE_ADDR]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.bf6.f16(<32 x half> [[TMP4]], float [[TMP5]]) +// CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT6_ADDR]], align 8 +// CHECK-NEXT: store <6 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 32 +// CHECK-NEXT: [[TMP8:%.*]] = load <32 x bfloat>, ptr addrspace(5) [[SRCBF32_ADDR]], align 64 +// CHECK-NEXT: [[TMP9:%.*]] = load float, ptr addrspace(5) [[SCALE_ADDR]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.fp6.bf16(<32 x bfloat> [[TMP8]], float [[TMP9]]) +// CHECK-NEXT: [[TMP11:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT6_ADDR]], align 8 +// CHECK-NEXT: store <6 x i32> [[TMP10]], ptr addrspace(1) [[TMP11]], align 32 +// CHECK-NEXT: [[TMP12:%.*]] = load <32 x half>, ptr addrspace(5) [[SRCH32_ADDR]], align 64 +// CHECK-NEXT: [[TMP13:%.*]] = load float, ptr addrspace(5) [[SCALE_ADDR]], align 4 +// CHECK-NEXT: [[TMP14:%.*]] = call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.fp6.f16(<32 x half> [[TMP12]], float [[TMP13]]) +// CHECK-NEXT: [[TMP15:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT6_ADDR]], align 8 +// CHECK-NEXT: store <6 x i32> [[TMP14]], ptr addrspace(1) [[TMP15]], align 32 +// CHECK-NEXT: [[TMP16:%.*]] = load <16 x float>, ptr addrspace(5) [[SRC0F32_ADDR]], align 64 +// CHECK-NEXT: [[TMP17:%.*]] = load <16 x float>, ptr addrspace(5) [[SRC1F32_ADDR]], align 64 +// CHECK-NEXT: [[TMP18:%.*]] = load float, ptr addrspace(5) [[SCALE_ADDR]], align 4 +// CHECK-NEXT: [[TMP19:%.*]] = call <6 x i32> @llvm.amdgcn.cvt.scalef32.2xpk16.bf6.f32(<16 x float> [[TMP16]], <16 x float> [[TMP17]], float [[TMP18]]) +// CHECK-NEXT: [[TMP20:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT6_ADDR]], align 8 +// CHECK-NEXT: store <6 x i32> [[TMP19]], ptr addrspace(1) [[TMP20]], align 32 +// CHECK-NEXT: [[TMP21:%.*]] = load <16 x float>, ptr addrspace(5) [[SRC0F32_ADDR]], align 64 +// CHECK-NEXT: [[TMP22:%.*]] = load <16 x float>, ptr addrspace(5) [[SRC1F32_ADDR]], align 64 +// CHECK-NEXT: [[TMP23:%.*]] = load float, ptr addrspace(5) [[SCALE_ADDR]], align 4 +// CHECK-NEXT: [[TMP24:%.*]] = call <6 x i32> @llvm.amdgcn.cvt.scalef32.2xpk16.fp6.f32(<16 x float> [[TMP21]], <16 x float> [[TMP22]], float [[TMP23]]) +// CHECK-NEXT: [[TMP25:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT6_ADDR]], align 8 +// CHECK-NEXT: store <6 x i32> [[TMP24]], ptr addrspace(1) [[TMP25]], align 32 +// CHECK-NEXT: ret void +// +void test_cvt_scalef32_pk(global uint6 *out6, bfloat32 srcbf32, half32 srch32, float16 src0f32, float16 src1f32, float scale) +{ + *out6 = __builtin_amdgcn_cvt_scalef32_pk32_bf6_bf16(srcbf32, scale); + *out6 = __builtin_amdgcn_cvt_scalef32_pk32_bf6_f16(srch32, scale); + *out6 = __builtin_amdgcn_cvt_scalef32_pk32_fp6_bf16(srcbf32, scale); + *out6 = __builtin_amdgcn_cvt_scalef32_pk32_fp6_f16(srch32, scale); + *out6 = __builtin_amdgcn_cvt_scalef32_2xpk16_bf6_f32(src0f32, src1f32, scale); + *out6 = __builtin_amdgcn_cvt_scalef32_2xpk16_fp6_f32(src0f32, src1f32, scale); +} + +// CHECK-LABEL: @test_ashr_pk_i8_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[SRC0_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[SRC1_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[SRC2_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: store i32 [[SRC0:%.*]], ptr addrspace(5) [[SRC0_ADDR]], align 4 +// CHECK-NEXT: store i32 [[SRC1:%.*]], ptr addrspace(5) [[SRC1_ADDR]], align 4 +// CHECK-NEXT: store i32 [[SRC2:%.*]], ptr addrspace(5) [[SRC2_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[SRC0_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[SRC1_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[SRC2_ADDR]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.amdgcn.ashr.pk.i8.i32(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) +// CHECK-NEXT: [[CONV:%.*]] = zext i16 [[TMP3]] to i32 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: store i32 [[CONV]], ptr addrspace(1) [[TMP4]], align 4 +// CHECK-NEXT: ret void +// +void test_ashr_pk_i8_i32(global int* out, uint src0, uint src1, uint src2) { + *out = __builtin_amdgcn_ashr_pk_i8_i32(src0, src1, src2); +} + +// CHECK-LABEL: @test_ashr_pk_u8_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[SRC0_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[SRC1_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[SRC2_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: store i32 [[SRC0:%.*]], ptr addrspace(5) [[SRC0_ADDR]], align 4 +// CHECK-NEXT: store i32 [[SRC1:%.*]], ptr addrspace(5) [[SRC1_ADDR]], align 4 +// CHECK-NEXT: store i32 [[SRC2:%.*]], ptr addrspace(5) [[SRC2_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[SRC0_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[SRC1_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[SRC2_ADDR]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.amdgcn.ashr.pk.u8.i32(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) +// CHECK-NEXT: [[CONV:%.*]] = zext i16 [[TMP3]] to i32 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: store i32 [[CONV]], ptr addrspace(1) [[TMP4]], align 4 +// CHECK-NEXT: ret void +// +void test_ashr_pk_u8_i32(global int* out, uint src0, uint src1, uint src2) { + *out = __builtin_amdgcn_ashr_pk_u8_i32(src0, src1, src2); +} + +// CHECK-LABEL: @builtins_amdgcn_dl_insts( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[FC_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[V2SSA_ADDR:%.*]] = alloca <2 x i16>, align 4, addrspace(5) +// CHECK-NEXT: [[V2SSB_ADDR:%.*]] = alloca <2 x i16>, align 4, addrspace(5) +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: store float [[FC:%.*]], ptr addrspace(5) [[FC_ADDR]], align 4 +// CHECK-NEXT: store <2 x i16> [[V2SSA:%.*]], ptr addrspace(5) [[V2SSA_ADDR]], align 4 +// CHECK-NEXT: store <2 x i16> [[V2SSB:%.*]], ptr addrspace(5) [[V2SSB_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr addrspace(5) [[V2SSA_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i16> [[TMP0]] to <2 x bfloat> +// CHECK-NEXT: [[TMP2:%.*]] = load <2 x i16>, ptr addrspace(5) [[V2SSB_ADDR]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to <2 x bfloat> +// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr addrspace(5) [[FC_ADDR]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.fdot2.f32.bf16(<2 x bfloat> [[TMP1]], <2 x bfloat> [[TMP3]], float [[TMP4]], i1 false) +// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: store float [[TMP5]], ptr addrspace(1) [[TMP6]], align 4 +// CHECK-NEXT: ret void +// +void builtins_amdgcn_dl_insts(global float *out, float fC, short2 v2ssA, short2 v2ssB) { + *out = __builtin_amdgcn_fdot2_f32_bf16(v2ssA, v2ssB, fC, false); +} + +// CHECK-LABEL: @builtins_amdgcn_dl_dot2c( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[FC_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[V2SSA_ADDR:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5) +// CHECK-NEXT: [[V2SSB_ADDR:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5) +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: store float [[FC:%.*]], ptr addrspace(5) [[FC_ADDR]], align 4 +// CHECK-NEXT: store <2 x bfloat> [[V2SSA:%.*]], ptr addrspace(5) [[V2SSA_ADDR]], align 4 +// CHECK-NEXT: store <2 x bfloat> [[V2SSB:%.*]], ptr addrspace(5) [[V2SSB_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[V2SSA_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[V2SSB_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[FC_ADDR]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.fdot2c.f32.bf16(<2 x bfloat> [[TMP0]], <2 x bfloat> [[TMP1]], float [[TMP2]], i1 false) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: store float [[TMP3]], ptr addrspace(1) [[TMP4]], align 4 +// CHECK-NEXT: ret void +// +void builtins_amdgcn_dl_dot2c(global float *out, float fC, bfloat2 v2ssA, bfloat2 v2ssB) { + *out = __builtin_amdgcn_fdot2c_f32_bf16(v2ssA, v2ssB, fC, false); +} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 3bc6107b7fd40..c22a43146a8c8 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -1,6 +1,6 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu tahiti -emit-llvm -o - %s | FileCheck -enable-var-scope --check-prefixes=CHECK-AMDGCN,CHECK %s -// RUN: %clang_cc1 -cl-std=CL2.0 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck -enable-var-scope --check-prefix=CHECK %s +// RUN: %clang_cc1 -cl-std=CL2.0 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck -enable-var-scope --check-prefixes=CHECK,CHECK-SPIRV %s #pragma OPENCL EXTENSION cl_khr_fp64 : enable @@ -866,7 +866,8 @@ void test_atomic_inc_dec(__attribute__((address_space(3))) uint *lptr, __attribu // CHECK-LABEL test_wavefrontsize( unsigned test_wavefrontsize() { - // CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wavefrontsize() + // CHECK-AMDGCN: ret i32 {{[0-9]+}} + // CHECK-SPIRV: {{.*}}call{{.*}} i32 @llvm.amdgcn.wavefrontsize() return __builtin_amdgcn_wavefrontsize(); } diff --git a/clang/test/Driver/cuda-no-threadsafe-statics.cu b/clang/test/Driver/cuda-no-threadsafe-statics.cu index eb15312f8f7d1..8730605f18828 100644 --- a/clang/test/Driver/cuda-no-threadsafe-statics.cu +++ b/clang/test/Driver/cuda-no-threadsafe-statics.cu @@ -2,7 +2,8 @@ // compilation only. // // RUN: %clang -### -x cuda --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 %s \ -// RUN: -nocudainc -nocudalib 2>&1 | FileCheck %s +// RUN: -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda \ +// RUN: 2>&1 | FileCheck %s // RUN: %clang -### -x hip --target=x86_64-linux-gnu -c --cuda-gpu-arch=gfx1010 %s \ // RUN: -nocudainc -nocudalib 2>&1 | FileCheck %s diff --git a/clang/test/Driver/hexagon-toolchain-linux.c b/clang/test/Driver/hexagon-toolchain-linux.c index 86cc9a30e932c..6f7f3b20f9141 100644 --- a/clang/test/Driver/hexagon-toolchain-linux.c +++ b/clang/test/Driver/hexagon-toolchain-linux.c @@ -11,7 +11,7 @@ // CHECK000-NOT: {{.*}}basic_linux_libcxx_tree{{/|\\\\}}usr{{/|\\\\}}lib{{/|\\\\}}crti.o // CHECK000: "-dynamic-linker={{/|\\\\}}lib{{/|\\\\}}ld-musl-hexagon.so.1" // CHECK000: "{{.*}}basic_linux_libcxx_tree{{/|\\\\}}usr{{/|\\\\}}lib{{/|\\\\}}crt1.o" -// CHECK000: "-lclang_rt.builtins-hexagon" "-lc" +// CHECK000: "-lc" "-lclang_rt.builtins-hexagon" // ----------------------------------------------------------------------------- // Passing --musl --shared // ----------------------------------------------------------------------------- @@ -21,7 +21,7 @@ // RUN: --sysroot=%S/Inputs/basic_linux_libcxx_tree -shared %s 2>&1 | FileCheck -check-prefix=CHECK001 %s // CHECK001-NOT: -dynamic-linker={{/|\\\\}}lib{{/|\\\\}}ld-musl-hexagon.so.1 // CHECK001: "{{.*}}basic_linux_libcxx_tree{{/|\\\\}}usr{{/|\\\\}}lib{{/|\\\\}}crti.o" -// CHECK001: "-lclang_rt.builtins-hexagon" "-lc" +// CHECK001: "-lc" "-lclang_rt.builtins-hexagon" // CHECK001-NOT: {{.*}}basic_linux_libcxx_tree{{/|\\\\}}usr{{/|\\\\}}lib{{/|\\\\}}crt1.o // ----------------------------------------------------------------------------- // Passing --musl -nostdlib @@ -33,8 +33,8 @@ // CHECK002: "-dynamic-linker={{/|\\\\}}lib{{/|\\\\}}ld-musl-hexagon.so.1" // CHECK002-NOT: {{.*}}basic_linux_libcxx_tree{{/|\\\\}}usr{{/|\\\\}}lib{{/|\\\\}}crti.o // CHECK002-NOT: {{.*}}basic_linux_libcxx_tree{{/|\\\\}}usr{{/|\\\\}}lib{{/|\\\\}}crt1.o -// CHECK002-NOT: "-lclang_rt.builtins-hexagon" // CHECK002-NOT: "-lc" +// CHECK002-NOT: "-lclang_rt.builtins-hexagon" // ----------------------------------------------------------------------------- // Passing --musl -nostartfiles // ----------------------------------------------------------------------------- @@ -45,7 +45,7 @@ // CHECK003: "-dynamic-linker={{/|\\\\}}lib{{/|\\\\}}ld-musl-hexagon.so.1" // CHECK003-NOT: {{.*}}basic_linux_libcxx_tree{{/|\\\\}}usr{{/|\\\\}}lib{{/|\\\\}}Scrt1.o // CHECK003-NOT: {{.*}}basic_linux_libcxx_tree{{/|\\\\}}usr{{/|\\\\}}lib{{/|\\\\}}crt1.o -// CHECK003: "-lclang_rt.builtins-hexagon" "-lc" +// CHECK003: "-lc" "-lclang_rt.builtins-hexagon" // ----------------------------------------------------------------------------- // Passing --musl -nodefaultlibs // ----------------------------------------------------------------------------- @@ -55,8 +55,8 @@ // RUN: --sysroot=%S/Inputs/basic_linux_libcxx_tree -nodefaultlibs %s 2>&1 | FileCheck -check-prefix=CHECK004 %s // CHECK004: "-dynamic-linker={{/|\\\\}}lib{{/|\\\\}}ld-musl-hexagon.so.1" // CHECK004: "{{.*}}basic_linux_libcxx_tree{{/|\\\\}}usr{{/|\\\\}}lib{{/|\\\\}}crt1.o" -// CHECK004-NOT: "-lclang_rt.builtins-hexagon" // CHECK004-NOT: "-lc" +// CHECK004-NOT: "-lclang_rt.builtins-hexagon" // ----------------------------------------------------------------------------- // Passing --musl -nolibc // ----------------------------------------------------------------------------- diff --git a/clang/test/OpenMP/flush_ast_print.cpp b/clang/test/OpenMP/flush_ast_print.cpp index 9578ada020227..768282422032f 100644 --- a/clang/test/OpenMP/flush_ast_print.cpp +++ b/clang/test/OpenMP/flush_ast_print.cpp @@ -1,10 +1,10 @@ -// RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s -// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -verify %s -ast-print | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -std=c++11 -include-pch %t -verify %s -ast-print | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s -// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -verify %s -ast-print | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=51 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -std=c++11 -include-pch %t -verify %s -ast-print | FileCheck %s // expected-no-diagnostics #ifndef HEADER @@ -19,6 +19,7 @@ T tmain(T argc) { #pragma omp flush acq_rel #pragma omp flush acquire #pragma omp flush release +#pragma omp flush seq_cst #pragma omp flush(a) return a + argc; } @@ -27,18 +28,21 @@ T tmain(T argc) { // CHECK-NEXT: #pragma omp flush acq_rel{{$}} // CHECK-NEXT: #pragma omp flush acquire{{$}} // CHECK-NEXT: #pragma omp flush release{{$}} +// CHECK-NEXT: #pragma omp flush seq_cst{{$}} // CHECK-NEXT: #pragma omp flush (a) // CHECK: static int a; // CHECK-NEXT: #pragma omp flush // CHECK-NEXT: #pragma omp flush acq_rel{{$}} // CHECK-NEXT: #pragma omp flush acquire{{$}} // CHECK-NEXT: #pragma omp flush release{{$}} +// CHECK-NEXT: #pragma omp flush seq_cst{{$}} // CHECK-NEXT: #pragma omp flush (a) // CHECK: static char a; // CHECK-NEXT: #pragma omp flush // CHECK-NEXT: #pragma omp flush acq_rel{{$}} // CHECK-NEXT: #pragma omp flush acquire{{$}} // CHECK-NEXT: #pragma omp flush release{{$}} +// CHECK-NEXT: #pragma omp flush seq_cst{{$}} // CHECK-NEXT: #pragma omp flush (a) int main(int argc, char **argv) { @@ -48,11 +52,13 @@ int main(int argc, char **argv) { #pragma omp flush acq_rel #pragma omp flush acquire #pragma omp flush release +#pragma omp flush seq_cst #pragma omp flush(a) // CHECK-NEXT: #pragma omp flush // CHECK-NEXT: #pragma omp flush acq_rel // CHECK-NEXT: #pragma omp flush acquire{{$}} // CHECK-NEXT: #pragma omp flush release +// CHECK-NEXT: #pragma omp flush seq_cst // CHECK-NEXT: #pragma omp flush (a) return tmain(argc) + tmain(argv[0][0]) + a; } diff --git a/clang/test/OpenMP/flush_codegen.cpp b/clang/test/OpenMP/flush_codegen.cpp index c7dd88ef9ac31..fa2586d9fe258 100644 --- a/clang/test/OpenMP/flush_codegen.cpp +++ b/clang/test/OpenMP/flush_codegen.cpp @@ -1,13 +1,13 @@ -// RUN: %clang_cc1 -verify -fopenmp -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=51 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics #ifndef HEADER @@ -17,6 +17,7 @@ template T tmain(T argc) { static T a; #pragma omp flush +#pragma omp flush seq_cst #pragma omp flush acq_rel #pragma omp flush acquire #pragma omp flush release @@ -28,6 +29,7 @@ T tmain(T argc) { int main() { static int a; #pragma omp flush +#pragma omp flush seq_cst #pragma omp flush acq_rel #pragma omp flush acquire #pragma omp flush release @@ -37,6 +39,7 @@ int main() { // CHECK: call {{.*}}void @__kmpc_flush(ptr {{(@|%).+}}) // CHECK: call {{.*}}void @__kmpc_flush(ptr {{(@|%).+}}) // CHECK: call {{.*}}void @__kmpc_flush(ptr {{(@|%).+}}) + // CHECK: call {{.*}}void @__kmpc_flush(ptr {{(@|%).+}}) return tmain(a); // CHECK: call {{.*}} [[TMAIN:@.+]]( // CHECK: ret @@ -48,6 +51,7 @@ int main() { // CHECK: call {{.*}}void @__kmpc_flush(ptr {{(@|%).+}}) // CHECK: call {{.*}}void @__kmpc_flush(ptr {{(@|%).+}}) // CHECK: call {{.*}}void @__kmpc_flush(ptr {{(@|%).+}}) +// CHECK: call {{.*}}void @__kmpc_flush(ptr {{(@|%).+}}) // CHECK: ret // CHECK-NOT: line: 0, diff --git a/clang/test/OpenMP/flush_messages.cpp b/clang/test/OpenMP/flush_messages.cpp index ad4830b5bf94f..e78949bc924e1 100644 --- a/clang/test/OpenMP/flush_messages.cpp +++ b/clang/test/OpenMP/flush_messages.cpp @@ -134,14 +134,12 @@ label1 : { #pragma omp flush(argc) flush(argc) // expected-warning {{extra tokens at the end of '#pragma omp flush' are ignored}} #pragma omp parallel flush(argc) // expected-warning {{extra tokens at the end of '#pragma omp parallel' are ignored}} ; -#pragma omp flush seq_cst // expected-error {{unexpected OpenMP clause 'seq_cst' in directive '#pragma omp flush'}} #pragma omp flush acq_rel // omp45-error {{unexpected OpenMP clause 'acq_rel' in directive '#pragma omp flush'}} #pragma omp flush acquire // omp45-error {{unexpected OpenMP clause 'acquire' in directive '#pragma omp flush'}} #pragma omp flush release // omp45-error {{unexpected OpenMP clause 'release' in directive '#pragma omp flush'}} #pragma omp flush relaxed // expected-error {{unexpected OpenMP clause 'relaxed' in directive '#pragma omp flush'}} -#pragma omp flush seq_cst // expected-error {{unexpected OpenMP clause 'seq_cst' in directive '#pragma omp flush'}} -#pragma omp flush acq_rel acquire // omp45-error {{unexpected OpenMP clause 'acq_rel' in directive '#pragma omp flush'}} omp45-error {{unexpected OpenMP clause 'acquire' in directive '#pragma omp flush'}} omp51-error {{directive '#pragma omp flush' cannot contain more than one 'acq_rel', 'acquire' or 'release' clause}} omp51-note {{'acq_rel' clause used here}} -#pragma omp flush release acquire // omp45-error {{unexpected OpenMP clause 'release' in directive '#pragma omp flush'}} omp45-error {{unexpected OpenMP clause 'acquire' in directive '#pragma omp flush'}} omp51-error {{directive '#pragma omp flush' cannot contain more than one 'acq_rel', 'acquire' or 'release' clause}} omp51-note {{'release' clause used here}} +#pragma omp flush acq_rel acquire // omp45-error {{unexpected OpenMP clause 'acq_rel' in directive '#pragma omp flush'}} omp45-error {{unexpected OpenMP clause 'acquire' in directive '#pragma omp flush'}} omp51-error {{directive '#pragma omp flush' cannot contain more than one 'seq_cst', 'acq_rel', 'acquire' or 'release' clause}} omp51-note {{'acq_rel' clause used here}} +#pragma omp flush release acquire // omp45-error {{unexpected OpenMP clause 'release' in directive '#pragma omp flush'}} omp45-error {{unexpected OpenMP clause 'acquire' in directive '#pragma omp flush'}} omp51-error {{directive '#pragma omp flush' cannot contain more than one 'seq_cst', 'acq_rel', 'acquire' or 'release' clause}} omp51-note {{'release' clause used here}} #pragma omp flush acq_rel (argc) // omp45-error {{unexpected OpenMP clause 'acq_rel' in directive '#pragma omp flush'}} expected-warning {{extra tokens at the end of '#pragma omp flush' are ignored}} #pragma omp flush(argc) acq_rel // omp45-error {{unexpected OpenMP clause 'acq_rel' in directive '#pragma omp flush'}} omp51-error {{'flush' directive with memory order clause 'acq_rel' cannot have the list}} omp51-note {{memory order clause 'acq_rel' is specified here}} return tmain(argc); diff --git a/clang/test/Preprocessor/init-aarch64.c b/clang/test/Preprocessor/init-aarch64.c index c52c49a94e016..8ee6c6ba60af4 100644 --- a/clang/test/Preprocessor/init-aarch64.c +++ b/clang/test/Preprocessor/init-aarch64.c @@ -44,7 +44,7 @@ // AARCH64: #define __BIGGEST_ALIGNMENT__ 16 // AARCH64_BE-NEXT: #define __BIG_ENDIAN__ 1 // AARCH64-NEXT: #define __BITINT_MAXWIDTH__ 128 -// AARCH64-NEXT: #define __BOOL_WIDTH__ 8 +// AARCH64-NEXT: #define __BOOL_WIDTH__ 1 // AARCH64_BE-NEXT: #define __BYTE_ORDER__ __ORDER_BIG_ENDIAN__ // AARCH64_LE-NEXT: #define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ // AARCH64-NEXT: #define __CHAR16_TYPE__ unsigned short @@ -785,7 +785,7 @@ // ARM64EC-MSVC: #define __ATOMIC_SEQ_CST 5 // ARM64EC-MSVC: #define __BIGGEST_ALIGNMENT__ 16 // ARM64EC-MSVC: #define __BITINT_MAXWIDTH__ 128 -// ARM64EC-MSVC: #define __BOOL_WIDTH__ 8 +// ARM64EC-MSVC: #define __BOOL_WIDTH__ 1 // ARM64EC-MSVC: #define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ // ARM64EC-MSVC: #define __CHAR16_TYPE__ unsigned short // ARM64EC-MSVC: #define __CHAR32_TYPE__ unsigned int diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c index 0eb6977a2553c..0e3320f01b328 100644 --- a/clang/test/Preprocessor/init-loongarch.c +++ b/clang/test/Preprocessor/init-loongarch.c @@ -25,7 +25,7 @@ // LA32-NEXT: #define __ATOMIC_SEQ_CST 5 // LA32: #define __BIGGEST_ALIGNMENT__ 16 // LA32: #define __BITINT_MAXWIDTH__ 128 -// LA32: #define __BOOL_WIDTH__ 8 +// LA32: #define __BOOL_WIDTH__ 1 // LA32: #define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ // LA32: #define __CHAR16_TYPE__ unsigned short // LA32: #define __CHAR32_TYPE__ unsigned int @@ -346,7 +346,7 @@ // LA64-NEXT: #define __ATOMIC_SEQ_CST 5 // LA64: #define __BIGGEST_ALIGNMENT__ 16 // LA64: #define __BITINT_MAXWIDTH__ 128 -// LA64: #define __BOOL_WIDTH__ 8 +// LA64: #define __BOOL_WIDTH__ 1 // LA64: #define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ // LA64: #define __CHAR16_TYPE__ unsigned short // LA64: #define __CHAR32_TYPE__ unsigned int diff --git a/clang/test/Preprocessor/init.c b/clang/test/Preprocessor/init.c index c177975114332..05225c120b13d 100644 --- a/clang/test/Preprocessor/init.c +++ b/clang/test/Preprocessor/init.c @@ -1617,7 +1617,7 @@ // WEBASSEMBLY-NEXT:#define __ATOMIC_SEQ_CST 5 // WEBASSEMBLY-NEXT:#define __BIGGEST_ALIGNMENT__ 16 // WEBASSEMBLY-NEXT:#define __BITINT_MAXWIDTH__ 128 -// WEBASSEMBLY-NEXT:#define __BOOL_WIDTH__ 8 +// WEBASSEMBLY-NEXT:#define __BOOL_WIDTH__ 1 // WEBASSEMBLY-NEXT:#define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ // WEBASSEMBLY-NEXT:#define __CHAR16_TYPE__ unsigned short // WEBASSEMBLY-NEXT:#define __CHAR32_TYPE__ unsigned int diff --git a/clang/test/Sema/attr-target-version-unsupported.c b/clang/test/Sema/attr-target-version-unsupported.c new file mode 100644 index 0000000000000..7cf8172f5272e --- /dev/null +++ b/clang/test/Sema/attr-target-version-unsupported.c @@ -0,0 +1,4 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify %s + +//expected-warning@+1 {{unknown attribute 'target_version' ignored}} +int __attribute__((target_version("aes"))) foo(void) { return 3; } diff --git a/clang/test/SemaCXX/cxx2c-pack-indexing.cpp b/clang/test/SemaCXX/cxx2c-pack-indexing.cpp index 962dbb8137f28..cb679a6c3ad87 100644 --- a/clang/test/SemaCXX/cxx2c-pack-indexing.cpp +++ b/clang/test/SemaCXX/cxx2c-pack-indexing.cpp @@ -271,3 +271,37 @@ void f() { } } // namespace GH105903 + +namespace GH116105 { + +template using pack_type = Ts...[Np]; + +template using pack_expr = decltype(Ts...[Np]); + +template struct types; + +template struct indices; + +template struct repack; + +template struct repack> { + template + using pack_type_alias = types...>; + + template + using pack_expr_alias = types...>; +}; + +template struct mdispatch_ { + using Idx = __make_integer_seq; + + static_assert(__is_same( + typename repack::template pack_type_alias, types)); + + static_assert(__is_same( + typename repack::template pack_expr_alias, types)); +}; + +mdispatch_ d; + +} // namespace GH116105 diff --git a/clang/test/SemaHLSL/BuiltIns/buffer_update_counter-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/buffer_update_counter-errors.hlsl new file mode 100644 index 0000000000000..4aa3ac183d3b1 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/buffer_update_counter-errors.hlsl @@ -0,0 +1,48 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only -disable-llvm-passes -verify + +// RWStructuredBuffer +using handle_t = __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(int)]] [[hlsl::raw_buffer]]; +// RWBuffer +using bad_handle_not_raw_t = __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(int)]]; +// RWByteAddressBuffer +using bad_handle_no_type_t = __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::raw_buffer]]; +// StructuredBuffer +using bad_handle_not_uav_t = __hlsl_resource_t [[hlsl::resource_class(SRV)]] [[hlsl::contained_type(int)]] [[hlsl::raw_buffer]]; + +void test_args(int x, bool b) { + // expected-error@+1 {{too few arguments to function call, expected 2, have 1}} + __builtin_hlsl_buffer_update_counter(x); + + // expected-error@+1 {{too many arguments to function call, expected 2, have 3}} + __builtin_hlsl_buffer_update_counter(x, x, x); + + // expected-error@+1 {{used type 'int' where __hlsl_resource_t is required}} + __builtin_hlsl_buffer_update_counter(x, x); + + bad_handle_not_raw_t bad1; + bad_handle_no_type_t bad2; + bad_handle_not_uav_t bad3; + + // expected-error@+1 {{invalid __hlsl_resource_t type attributes}} + __builtin_hlsl_buffer_update_counter(bad1, 1); + + // expected-error@+1 {{invalid __hlsl_resource_t type attributes}} + __builtin_hlsl_buffer_update_counter(bad2, 1); + + // expected-error@+1 {{invalid __hlsl_resource_t type attributes}} + __builtin_hlsl_buffer_update_counter(bad3, 1); + + handle_t res; + + // expected-error@+1 {{argument 1 must be constant integer 1 or -1}} + __builtin_hlsl_buffer_update_counter(res, x); + + // expected-error@+1 {{passing 'const char *' to parameter of incompatible type 'int'}} + __builtin_hlsl_buffer_update_counter(res, "1"); + + // expected-error@+1 {{argument 1 must be constant integer 1 or -1}} + __builtin_hlsl_buffer_update_counter(res, 10); + + // no error + __builtin_hlsl_buffer_update_counter(res, 1); +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl index b3b359a1e0c65..2f1d312da7786 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl @@ -148,3 +148,13 @@ void test_smfmac_f32_32x32x64_fp8_fp8(global float16* out, int4 a, int8 b, float *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, d, 0); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8' must be a constant integer}} *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, 0, d); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8' must be a constant integer}} } + +void test_permlane16_swap(__global int* out, int old, int src, bool X) { + *out = __builtin_amdgcn_permlane16_swap(old, src, X, false); // expected-error{{argument to '__builtin_amdgcn_permlane16_swap' must be a constant integer}} + *out = __builtin_amdgcn_permlane16_swap(old, src, false, X); // expected-error{{argument to '__builtin_amdgcn_permlane16_swap' must be a constant integer}} +} + +void test_permlane32_swap(__global int* out, int old, int src, bool X) { + *out = __builtin_amdgcn_permlane32_swap(old, src, X, false); // expected-error{{argument to '__builtin_amdgcn_permlane32_swap' must be a constant integer}} + *out = __builtin_amdgcn_permlane32_swap(old, src, false, X); // expected-error{{argument to '__builtin_amdgcn_permlane32_swap' must be a constant integer}} +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl index 57523cf0af1b1..e0cde1d3ad87b 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl @@ -27,7 +27,8 @@ void test(__global float4* out0, half8 a0, half8 b0, float4 c0, __global float4* out12, int4 a12, int8 b12, float4 c12, __global float16* out13, int4 a13, int8 b13, float16 c13, __global float4* out14, int8 a14, int8 b14, float4 c14, int d14, int e14, - __global float16* out15, int8 a15, int8 b15, float16 c15, int d15, int e15) { + __global float16* out15, int8 a15, int8 b15, float16 c15, int d15, int e15, + __global uint2* out16, int a16, int b16) { *out0 = __builtin_amdgcn_mfma_f32_16x16x32_f16(a0, b0, c0, 0, 0, 0); // expected-error{{'__builtin_amdgcn_mfma_f32_16x16x32_f16' needs target feature gfx950-insts}} *out1 = __builtin_amdgcn_mfma_f32_32x32x16_f16(a1, b1, c1, 0, 0, 0); // expected-error{{'__builtin_amdgcn_mfma_f32_32x32x16_f16' needs target feature gfx950-insts}} *out2 = __builtin_amdgcn_mfma_f32_32x32x16_bf16(a2, b2, c2, 0, 0, 0); // expected-error{{'__builtin_amdgcn_mfma_f32_32x32x16_bf16' needs target feature gfx950-insts}} @@ -50,4 +51,6 @@ void test(__global float4* out0, half8 a0, half8 b0, float4 c0, *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a13, b13, c13, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8' needs target feature gfx950-insts}} *out14 = __builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4(a14, b14, c14, 0, 0, 0, d14, 0, e14); // expected-error{{'__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4' needs target feature gfx950-insts}} *out15 = __builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4(a15, b15, c15, 0, 0, 0, d15, 0, e15); // expected-error{{'__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4' needs target feature gfx950-insts}} + *out16 = __builtin_amdgcn_permlane16_swap(a16, b16, false, false); // expected-error{{'__builtin_amdgcn_permlane16_swap' needs target feature permlane16-swap}} + *out16 = __builtin_amdgcn_permlane32_swap(a16, b16, false, false); // expected-error{{'__builtin_amdgcn_permlane32_swap' needs target feature permlane32-swap}} } diff --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp index cc735e4872592..5481bb6b87503 100644 --- a/clang/tools/clang-format/ClangFormat.cpp +++ b/clang/tools/clang-format/ClangFormat.cpp @@ -178,7 +178,7 @@ enum class WNoError { Unknown }; static cl::bits WNoErrorList( "Wno-error", - cl::desc("If set don't error out on the specified warning type."), + cl::desc("If set, don't error out on the specified warning type."), cl::values( clEnumValN(WNoError::Unknown, "unknown", "If set, unknown format options are only warned about.\n" diff --git a/clang/tools/clang-shlib/CMakeLists.txt b/clang/tools/clang-shlib/CMakeLists.txt index 2d97347ea7f82..31484ec49c773 100644 --- a/clang/tools/clang-shlib/CMakeLists.txt +++ b/clang/tools/clang-shlib/CMakeLists.txt @@ -48,13 +48,11 @@ add_clang_library(clang-cpp ${_OBJECTS} LINK_LIBS ${_DEPS}) -# AIX linker does not support version script -if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "AIX") - configure_file(simple_version_script.map.in simple_version_script.map) - if (CMAKE_SYSTEM_NAME STREQUAL "Linux") - target_link_options(clang-cpp PRIVATE LINKER:--version-script,${CMAKE_CURRENT_BINARY_DIR}/simple_version_script.map) - endif() +configure_file(simple_version_script.map.in simple_version_script.map) + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux") + target_link_options(clang-cpp PRIVATE LINKER:--version-script,${CMAKE_CURRENT_BINARY_DIR}/simple_version_script.map) endif() # Optimize function calls for default visibility definitions to avoid PLT and diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index e1ae1770e8ebe..9db3187ac44e7 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -2600,16 +2600,20 @@ TEST_F(TokenAnnotatorTest, UnderstandsVerilogOperators) { EXPECT_TOKEN(Tokens[4], tok::string_literal, TT_Unknown); // Module headers. - Tokens = Annotate("module x();\nendmodule"); + Tokens = Annotate("module x();\n" + "endmodule"); ASSERT_EQ(Tokens.size(), 7u) << Tokens; EXPECT_TOKEN(Tokens[2], tok::l_paren, TT_VerilogMultiLineListLParen); - Tokens = Annotate("function automatic `x x();\nendmodule"); + Tokens = Annotate("function automatic `x x();\n" + "endmodule"); ASSERT_EQ(Tokens.size(), 10u) << Tokens; EXPECT_TOKEN(Tokens[5], tok::l_paren, TT_VerilogMultiLineListLParen); - Tokens = Annotate("function automatic x``x x();\nendmodule"); + Tokens = Annotate("function automatic x``x x();\n" + "endmodule"); ASSERT_EQ(Tokens.size(), 11u) << Tokens; EXPECT_TOKEN(Tokens[6], tok::l_paren, TT_VerilogMultiLineListLParen); - Tokens = Annotate("function automatic x::x x();\nendmodule"); + Tokens = Annotate("function automatic x::x x();\n" + "endmodule"); ASSERT_EQ(Tokens.size(), 11u) << Tokens; EXPECT_TOKEN(Tokens[6], tok::l_paren, TT_VerilogMultiLineListLParen); } diff --git a/clang/utils/TableGen/ClangASTNodesEmitter.cpp b/clang/utils/TableGen/ClangASTNodesEmitter.cpp index 16749d1183624..5971b0012305d 100644 --- a/clang/utils/TableGen/ClangASTNodesEmitter.cpp +++ b/clang/utils/TableGen/ClangASTNodesEmitter.cpp @@ -207,8 +207,9 @@ void clang::EmitClangASTNodes(const RecordKeeper &RK, raw_ostream &OS, ClangASTNodesEmitter(RK, N, S, PriorizeIfSubclassOf).run(OS); } -void printDeclContext(const std::multimap &Tree, - const Record *DeclContext, raw_ostream &OS) { +static void +printDeclContext(const std::multimap &Tree, + const Record *DeclContext, raw_ostream &OS) { if (!DeclContext->getValueAsBit(AbstractFieldName)) OS << "DECL_CONTEXT(" << DeclContext->getName() << ")\n"; auto [II, E] = Tree.equal_range(DeclContext); diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp index 4aa7594ffa6eb..534bf2d01d795 100644 --- a/clang/utils/TableGen/ClangAttrEmitter.cpp +++ b/clang/utils/TableGen/ClangAttrEmitter.cpp @@ -1821,9 +1821,9 @@ CreateSemanticSpellings(const std::vector &Spellings, return Ret; } -void WriteSemanticSpellingSwitch(StringRef VarName, - const SemanticSpellingMap &Map, - raw_ostream &OS) { +static void WriteSemanticSpellingSwitch(StringRef VarName, + const SemanticSpellingMap &Map, + raw_ostream &OS) { OS << " switch (" << VarName << ") {\n default: " << "llvm_unreachable(\"Unknown spelling list index\");\n"; for (const auto &I : Map) @@ -2367,12 +2367,12 @@ template static void forEachSpelling(const Record &Attr, Fn &&F) { } } -std::map> NameToAttrsMap; +static std::map> NameToAttrsMap; /// Build a map from the attribute name to the Attrs that use that name. If more /// than one Attr use a name, the arguments could be different so a more complex /// check is needed in the generated switch. -void generateNameToAttrsMap(const RecordKeeper &Records) { +static void generateNameToAttrsMap(const RecordKeeper &Records) { for (const auto *A : Records.getAllDerivedDefinitions("Attr")) { for (const FlattenedSpelling &S : GetFlattenedSpellings(*A)) { auto [It, Inserted] = NameToAttrsMap.try_emplace(S.name()); @@ -3965,9 +3965,9 @@ void EmitClangAttrASTVisitor(const RecordKeeper &Records, raw_ostream &OS) { OS << "#endif // ATTR_VISITOR_DECLS_ONLY\n"; } -void EmitClangAttrTemplateInstantiateHelper(ArrayRef Attrs, - raw_ostream &OS, - bool AppliesToDecl) { +static void +EmitClangAttrTemplateInstantiateHelper(ArrayRef Attrs, + raw_ostream &OS, bool AppliesToDecl) { OS << " switch (At->getKind()) {\n"; for (const auto *Attr : Attrs) { @@ -4622,7 +4622,7 @@ static bool isParamExpr(const Record *Arg) { .Default(false); } -void GenerateIsParamExpr(const Record &Attr, raw_ostream &OS) { +static void GenerateIsParamExpr(const Record &Attr, raw_ostream &OS) { OS << "bool isParamExpr(size_t N) const override {\n"; OS << " return "; auto Args = Attr.getValueAsListOfDefs("Args"); @@ -4633,8 +4633,8 @@ void GenerateIsParamExpr(const Record &Attr, raw_ostream &OS) { OS << "}\n\n"; } -void GenerateHandleAttrWithDelayedArgs(const RecordKeeper &Records, - raw_ostream &OS) { +static void GenerateHandleAttrWithDelayedArgs(const RecordKeeper &Records, + raw_ostream &OS) { OS << "static void handleAttrWithDelayedArgs(Sema &S, Decl *D, "; OS << "const ParsedAttr &Attr) {\n"; OS << " SmallVector ArgExprs;\n"; diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html index d59cbbbbec1b5..da01cf6ceab59 100755 --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -225,6 +225,32 @@

C++2c implementation status

P0963R3 No + + + constexpr structured bindings + P2686R5 + No + + + Allowing exception throwing in constant-evaluation + P3068R6 + No + + + Remove Deprecated Array Comparisons from C++26 + P2865R6 + No + + + Structured Bindings can introduce a Pack + P1061R10 + No + + + The Oxford variadic comma + P3176R1 + No + diff --git a/compiler-rt/cmake/Modules/AddCompilerRT.cmake b/compiler-rt/cmake/Modules/AddCompilerRT.cmake index 77261f631ea11..3a6762320f447 100644 --- a/compiler-rt/cmake/Modules/AddCompilerRT.cmake +++ b/compiler-rt/cmake/Modules/AddCompilerRT.cmake @@ -790,6 +790,7 @@ function(configure_compiler_rt_lit_site_cfg input output) string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} COMPILER_RT_RESOLVED_TEST_COMPILER ${COMPILER_RT_TEST_COMPILER}) string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} COMPILER_RT_RESOLVED_OUTPUT_DIR ${COMPILER_RT_OUTPUT_DIR}) + string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} COMPILER_RT_RESOLVED_EXEC_OUTPUT_DIR ${COMPILER_RT_EXEC_OUTPUT_DIR}) string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} COMPILER_RT_RESOLVED_LIBRARY_OUTPUT_DIR ${output_dir}) configure_lit_site_cfg(${input} ${output}) diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index c8595b97b337d..42d197f2b08d0 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -296,6 +296,7 @@ endif () # long double is not 80 bits on Android or MSVC. set(x86_80_BIT_SOURCES divxc3.c + extendhfxf2.c extendxftf2.c fixxfdi.c fixxfti.c diff --git a/compiler-rt/lib/builtins/extendhfxf2.c b/compiler-rt/lib/builtins/extendhfxf2.c new file mode 100644 index 0000000000000..a2cd106e1c1b3 --- /dev/null +++ b/compiler-rt/lib/builtins/extendhfxf2.c @@ -0,0 +1,16 @@ +//===-- lib/extendhfxf2.c - half -> long double conversion --------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "int_lib.h" +#define SRC_HALF +#define DST_DOUBLE +#include "fp_extend_impl.inc" + +// Long double are expected to be as precise as double. +COMPILER_RT_ABI xf_float __extendhfxf2(src_t a) { + return (xf_float)__extendXfYf2__(a); +} diff --git a/compiler-rt/lib/interception/interception_win.cpp b/compiler-rt/lib/interception/interception_win.cpp index ac81beee11a39..8b8ce1abe906f 100644 --- a/compiler-rt/lib/interception/interception_win.cpp +++ b/compiler-rt/lib/interception/interception_win.cpp @@ -816,6 +816,10 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) { // mov rax, QWORD PTR [rip + XXXXXXXX] case 0x058d48: // 48 8d 05 XX XX XX XX : // lea rax, QWORD PTR [rip + XXXXXXXX] + case 0x0d8948: // 48 89 0d XX XX XX XX : + // mov QWORD PTR [rip + XXXXXXXX], rcx + case 0x158948: // 48 89 15 XX XX XX XX : + // mov QWORD PTR [rip + XXXXXXXX], rdx case 0x25ff48: // 48 ff 25 XX XX XX XX : // rex.W jmp QWORD PTR [rip + XXXXXXXX] case 0x158D4C: // 4c 8d 15 XX XX XX XX : lea r10, [rip + XX] diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_deadlock_detector.h b/compiler-rt/lib/sanitizer_common/sanitizer_deadlock_detector.h index 0749f633b4bcf..1664b92b21369 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_deadlock_detector.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_deadlock_detector.h @@ -120,7 +120,7 @@ class DeadlockDetectorTLS { u32 lock; u32 stk; }; - LockWithContext all_locks_with_contexts_[64]; + LockWithContext all_locks_with_contexts_[128]; uptr n_all_locks_; }; diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp index 5a2d39cd30607..c83efec8eaca2 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp @@ -673,7 +673,8 @@ void CheckUnwind() { thr->ignore_reads_and_writes++; atomic_store_relaxed(&thr->in_signal_handler, 0); #endif - PrintCurrentStackSlow(StackTrace::GetCurrentPc()); + PrintCurrentStack(StackTrace::GetCurrentPc(), + common_flags()->fast_unwind_on_fatal); } bool is_initialized; diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.h b/compiler-rt/lib/tsan/rtl/tsan_rtl.h index f48be8e0a4fe0..49bee9c67d303 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.h +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.h @@ -514,7 +514,7 @@ bool IsExpectedReport(uptr addr, uptr size); StackID CurrentStackId(ThreadState *thr, uptr pc); ReportStack *SymbolizeStackId(StackID stack_id); void PrintCurrentStack(ThreadState *thr, uptr pc); -void PrintCurrentStackSlow(uptr pc); // uses libunwind +void PrintCurrentStack(uptr pc, bool fast); // may uses libunwind MBlock *JavaHeapBlock(uptr addr, uptr *start); void Initialize(ThreadState *thr); diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp index 0311df553fdd0..51a98e2f2d5e7 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp @@ -828,18 +828,18 @@ void PrintCurrentStack(ThreadState *thr, uptr pc) { PrintStack(SymbolizeStack(trace)); } -// Always inlining PrintCurrentStackSlow, because LocatePcInTrace assumes +// Always inlining PrintCurrentStack, because LocatePcInTrace assumes // __sanitizer_print_stack_trace exists in the actual unwinded stack, but -// tail-call to PrintCurrentStackSlow breaks this assumption because +// tail-call to PrintCurrentStack breaks this assumption because // __sanitizer_print_stack_trace disappears after tail-call. // However, this solution is not reliable enough, please see dvyukov's comment // http://reviews.llvm.org/D19148#406208 // Also see PR27280 comment 2 and 3 for breaking examples and analysis. -ALWAYS_INLINE USED void PrintCurrentStackSlow(uptr pc) { +ALWAYS_INLINE USED void PrintCurrentStack(uptr pc, bool fast) { #if !SANITIZER_GO uptr bp = GET_CURRENT_FRAME(); auto *ptrace = New(); - ptrace->Unwind(pc, bp, nullptr, false); + ptrace->Unwind(pc, bp, nullptr, fast); for (uptr i = 0; i < ptrace->size / 2; i++) { uptr tmp = ptrace->trace_buffer[i]; @@ -857,6 +857,6 @@ using namespace __tsan; extern "C" { SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_print_stack_trace() { - PrintCurrentStackSlow(StackTrace::GetCurrentPc()); + PrintCurrentStack(StackTrace::GetCurrentPc(), false); } } // extern "C" diff --git a/compiler-rt/test/builtins/Unit/extendhfxf2_test.c b/compiler-rt/test/builtins/Unit/extendhfxf2_test.c new file mode 100644 index 0000000000000..80e6f78cdd9c4 --- /dev/null +++ b/compiler-rt/test/builtins/Unit/extendhfxf2_test.c @@ -0,0 +1,73 @@ +// RUN: %clang_builtins %s %librt -o %t && %run %t +// REQUIRES: librt_has_extendhfxf2 + +#include +#include // for isnan, isinf +#include + +#include "int_lib.h" + +#if HAS_80_BIT_LONG_DOUBLE && defined(COMPILER_RT_HAS_FLOAT16) + +long double __extendhfxf2(_Float16 f); + +int test_extendhfxf2(_Float16 a, long double expected) { + long double x = __extendhfxf2(a); + __uint16_t *b = (void *)&a; + int ret = !((isnan(x) && isnan(expected)) || x == expected); + if (ret) { + printf("error in test__extendhfxf2(%#.4x) = %.20Lf, " + "expected %.20Lf\n", + *b, x, expected); + } + return ret; +} + +char assumption_1[sizeof(_Float16) * CHAR_BIT == 16] = {0}; + +int main() { + // Small positive value + if (test_extendhfxf2(0.09997558593750000000f, 0.09997558593750000000L)) + return 1; + + // Small negative value + if (test_extendhfxf2(-0.09997558593750000000f, -0.09997558593750000000L)) + return 1; + + // Zero + if (test_extendhfxf2(0.0f, 0.0L)) + return 1; + + // Smallest positive non-zero value + if (test_extendhfxf2(0x1p-16f, 0x1p-16L)) + return 1; + + // Smallest negative non-zero value + if (test_extendhfxf2(-0x1p-16f, -0x1p-16L)) + return 1; + + // Positive infinity + if (test_extendhfxf2(__builtin_huge_valf16(), __builtin_huge_valf64x())) + return 1; + + // Negative infinity + if (test_extendhfxf2(-__builtin_huge_valf16(), + (long double)-__builtin_huge_valf64x())) + return 1; + + // NaN + if (test_extendhfxf2(__builtin_nanf16(""), + (long double)__builtin_nanf64x(""))) + return 1; + + return 0; +} + +#else + +int main() { + printf("skipped\n"); + return 0; +} + +#endif diff --git a/compiler-rt/test/hwasan/lit.cfg.py b/compiler-rt/test/hwasan/lit.cfg.py index 594f3294a84ac..bbf23e683240a 100644 --- a/compiler-rt/test/hwasan/lit.cfg.py +++ b/compiler-rt/test/hwasan/lit.cfg.py @@ -2,6 +2,9 @@ import os +from lit.llvm import llvm_config +from lit.llvm.subst import ToolSubst, FindTool + # Setup config name. config.name = "HWAddressSanitizer" + getattr(config, "name_suffix", "default") @@ -74,6 +77,12 @@ def build_invocation(compile_flags): ("%env_hwasan_opts=", "env HWASAN_OPTIONS=" + default_hwasan_opts_str) ) +# Ensure that we can use hwasan_symbolize from the expected location +llvm_config.add_tool_substitutions( + [ToolSubst("hwasan_symbolize", unresolved="fatal")], + search_dirs=[config.compiler_rt_bindir], +) + # Default test suffixes. config.suffixes = [".c", ".cpp"] diff --git a/compiler-rt/test/lit.common.configured.in b/compiler-rt/test/lit.common.configured.in index 66935c358afed..050792b6b2621 100644 --- a/compiler-rt/test/lit.common.configured.in +++ b/compiler-rt/test/lit.common.configured.in @@ -28,6 +28,7 @@ set_default("python_executable", "@Python3_EXECUTABLE@") set_default("compiler_rt_debug", @COMPILER_RT_DEBUG_PYBOOL@) set_default("compiler_rt_intercept_libdispatch", @COMPILER_RT_INTERCEPT_LIBDISPATCH_PYBOOL@) set_default("compiler_rt_output_dir", "@COMPILER_RT_RESOLVED_OUTPUT_DIR@") +set_default("compiler_rt_bindir", "@COMPILER_RT_RESOLVED_EXEC_OUTPUT_DIR@") set_default("compiler_rt_libdir", "@COMPILER_RT_RESOLVED_LIBRARY_OUTPUT_DIR@") set_default("emulator", "@COMPILER_RT_EMULATOR@") set_default("asan_shadow_scale", "@COMPILER_RT_ASAN_SHADOW_SCALE@") diff --git a/compiler-rt/test/tsan/many_held_mutex.cpp b/compiler-rt/test/tsan/many_held_mutex.cpp new file mode 100644 index 0000000000000..76e072b35a233 --- /dev/null +++ b/compiler-rt/test/tsan/many_held_mutex.cpp @@ -0,0 +1,21 @@ +// RUN: %clangxx_tsan -O1 %s %link_libcxx_tsan -fsanitize=thread -o %t +// RUN: %run %t 128 + +#include +#include +#include + +int main(int argc, char *argv[]) { + int num_of_mtx = std::atoi(argv[1]); + + std::vector mutexes(num_of_mtx); + + for (auto &mu : mutexes) { + mu.lock(); + } + for (auto &mu : mutexes) { + mu.unlock(); + } + + return 0; +} diff --git a/flang/examples/FeatureList/FeatureList.cpp b/flang/examples/FeatureList/FeatureList.cpp index 7e024dc387516..6ae92acf20608 100644 --- a/flang/examples/FeatureList/FeatureList.cpp +++ b/flang/examples/FeatureList/FeatureList.cpp @@ -498,8 +498,7 @@ struct NodeVisitor { READ_FEATURE(OmpLinearModifier::Value) READ_FEATURE(OmpLoopDirective) READ_FEATURE(OmpMapClause) - READ_FEATURE(OmpMapClause::TypeModifier) - READ_FEATURE(OmpMapClause::Type) + READ_FEATURE(OmpMapClause::Modifier) READ_FEATURE(OmpNumTasksClause) READ_FEATURE(OmpNumTasksClause::Prescriptiveness) READ_FEATURE(OmpObject) diff --git a/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp index a3d9b0cfdc79b..5bd8c76199278 100644 --- a/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp +++ b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp @@ -229,8 +229,8 @@ void OpenMPCounterVisitor::Post(const OmpTaskDependenceType::Value &c) { clauseDetails += "type=" + std::string{OmpTaskDependenceType::EnumToString(c)} + ";"; } -void OpenMPCounterVisitor::Post(const OmpMapClause::Type &c) { - clauseDetails += "type=" + std::string{OmpMapClause::EnumToString(c)} + ";"; +void OpenMPCounterVisitor::Post(const OmpMapType::Value &c) { + clauseDetails += "type=" + std::string{OmpMapType::EnumToString(c)} + ";"; } void OpenMPCounterVisitor::Post(const OmpScheduleClause::Kind &c) { clauseDetails += diff --git a/flang/examples/FlangOmpReport/FlangOmpReportVisitor.h b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.h index 86f206ba85c6d..7e9ae94bef297 100644 --- a/flang/examples/FlangOmpReport/FlangOmpReportVisitor.h +++ b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.h @@ -75,7 +75,7 @@ struct OpenMPCounterVisitor { void Post(const OmpLinearModifier::Value &c); void Post(const OmpOrderingModifier::Value &c); void Post(const OmpTaskDependenceType::Value &c); - void Post(const OmpMapClause::Type &c); + void Post(const OmpMapType::Value &c); void Post(const OmpScheduleClause::Kind &c); void Post(const OmpIfClause::DirectiveNameModifier &c); void Post(const OmpCancelType::Type &c); diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index 6d1e7329d5cce..68f9406dc2830 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -476,6 +476,11 @@ class ParseTreeDumper { NODE(parser, NullInit) NODE(parser, ObjectDecl) NODE(parser, OldParameterStmt) + NODE(parser, OmpMapper) + NODE(parser, OmpMapType) + NODE_ENUM(OmpMapType, Value) + NODE(parser, OmpMapTypeModifier) + NODE_ENUM(OmpMapTypeModifier, Value) NODE(parser, OmpIteratorSpecifier) NODE(parser, OmpIterator) NODE(parser, OmpAffinityClause) @@ -536,7 +541,9 @@ class ParseTreeDumper { NODE(parser, OmpEndLoopDirective) NODE(parser, OmpEndSectionsDirective) NODE(parser, OmpFromClause) - NODE_ENUM(OmpFromClause, Expectation) + NODE(OmpFromClause, Modifier) + NODE(parser, OmpExpectation) + NODE_ENUM(OmpExpectation, Value) NODE(parser, OmpIfClause) NODE_ENUM(OmpIfClause, DirectiveNameModifier) NODE_ENUM(OmpLastprivateClause, LastprivateModifier) @@ -548,9 +555,7 @@ class ParseTreeDumper { NODE_ENUM(OmpLinearModifier, Value) NODE(parser, OmpLoopDirective) NODE(parser, OmpMapClause) - NODE(parser, OmpMapperIdentifier) - NODE_ENUM(OmpMapClause, TypeModifier) - NODE_ENUM(OmpMapClause, Type) + NODE(OmpMapClause, Modifier) static std::string GetNodeName(const llvm::omp::Clause &x) { return llvm::Twine( "llvm::omp::Clause = ", llvm::omp::getOpenMPClauseName(x)) @@ -601,8 +606,7 @@ class ParseTreeDumper { NODE(parser, OmpSectionsDirective) NODE(parser, OmpSimpleStandaloneDirective) NODE(parser, OmpToClause) - // No NODE_ENUM for OmpToClause::Expectation, because it's an alias - // for OmpFromClause::Expectation. + NODE(OmpToClause, Modifier) NODE(parser, Only) NODE(parser, OpenACCAtomicConstruct) NODE(parser, OpenACCBlockConstruct) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index de179f47be8fc..8d7119a56b7f8 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3502,6 +3502,21 @@ struct OmpDependenceType { WRAPPER_CLASS_BOILERPLATE(OmpDependenceType, Value); }; +// Ref: [5.1:205-209], [5.2:166-168] +// +// motion-modifier -> +// PRESENT | // since 5.0, until 5.0 +// mapper | iterator +// expectation -> +// PRESENT // since 5.1 +// +// The PRESENT value was a part of motion-modifier in 5.1, and became a +// value of expectation in 5.2. +struct OmpExpectation { + ENUM_CLASS(Value, Present); + WRAPPER_CLASS_BOILERPLATE(OmpExpectation, Value); +}; + // Ref: [5.0:47-49], [5.1:49-51], [5.2:67-69] // // iterator-modifier -> @@ -3519,6 +3534,34 @@ struct OmpLinearModifier { WRAPPER_CLASS_BOILERPLATE(OmpLinearModifier, Value); }; +// Ref: [5.0:176-180], [5.1:205-210], [5.2:149-150] +// +// mapper -> +// identifier // since 4.5 +struct OmpMapper { + WRAPPER_CLASS_BOILERPLATE(OmpMapper, Name); +}; + +// Ref: [4.5:216-219], [5.0:315-324], [5.1:347-355], [5.2:150-158] +// +// map-type -> +// ALLOC | DELETE | FROM | RELEASE | TO | TOFROM // since 4.5 +struct OmpMapType { + ENUM_CLASS(Value, Alloc, Delete, From, Release, To, Tofrom); + WRAPPER_CLASS_BOILERPLATE(OmpMapType, Value); +}; + +// Ref: [4.5:216-219], [5.0:315-324], [5.1:347-355], [5.2:150-158] +// +// map-type-modifier -> +// ALWAYS | // since 4.5 +// CLOSE | // since 5.0 +// PRESENT // since 5.1 +struct OmpMapTypeModifier { + ENUM_CLASS(Value, Always, Close, Present, Ompx_Hold) + WRAPPER_CLASS_BOILERPLATE(OmpMapTypeModifier, Value); +}; + // Ref: [4.5:56-63], [5.0:101-109], [5.1:126-133], [5.2:252-254] // // modifier -> @@ -3546,10 +3589,10 @@ struct OmpOrderModifier { // Ref: [4.5:201-207], [5.0:293-299], [5.1:325-331], [5.2:124] // // reduction-identifier -> -// base-language-identifier | // since 4.5 -// - | // since 4.5, until 5.2 -// + | * | .AND. | .OR. | .EQV. | .NEQV. | // since 4.5 -// MIN | MAX | IAND | IOR | IEOR // since 4.5 +// base-language-identifier | // since 4.5 +// - | // since 4.5, until 5.2 +// + | * | .AND. | .OR. | .EQV. | .NEQV. | // since 4.5 +// MIN | MAX | IAND | IOR | IEOR // since 4.5 struct OmpReductionIdentifier { UNION_CLASS_BOILERPLATE(OmpReductionIdentifier); std::variant u; @@ -3558,7 +3601,7 @@ struct OmpReductionIdentifier { // Ref: [5.0:300-302], [5.1:332-334], [5.2:134-137] // // reduction-modifier -> -// DEFAULT | INSCAN | TASK // since 5.0 +// DEFAULT | INSCAN | TASK // since 5.0 struct OmpReductionModifier { ENUM_CLASS(Value, Default, Inscan, Task); WRAPPER_CLASS_BOILERPLATE(OmpReductionModifier, Value); @@ -3578,9 +3621,9 @@ struct OmpTaskDependenceType { // Ref: [4.5:229-230], [5.0:324-325], [5.1:357-358], [5.2:161-162] // // variable-category -> -// SCALAR | // since 4.5 -// AGGREGATE | ALLOCATABLE | POINTER | // since 5.0 -// ALL // since 5.2 +// SCALAR | // since 4.5 +// AGGREGATE | ALLOCATABLE | POINTER | // since 5.0 +// ALL // since 5.2 struct OmpVariableCategory { ENUM_CLASS(Value, Aggregate, All, Allocatable, Pointer, Scalar) WRAPPER_CLASS_BOILERPLATE(OmpVariableCategory, Value); @@ -3723,15 +3766,9 @@ struct OmpDeviceTypeClause { // motion-modifier -> // PRESENT | mapper-modifier | iterator-modifier struct OmpFromClause { - ENUM_CLASS(Expectation, Present); TUPLE_CLASS_BOILERPLATE(OmpFromClause); - - // As in the case of MAP, modifiers are parsed as lists, even if they - // are unique. These restrictions will be checked in semantic checks. - std::tuple>, - std::optional>, OmpObjectList, - bool> // were the modifiers comma-separated? - t; + MODIFIER_BOILERPLATE(OmpExpectation, OmpIterator, OmpMapper); + std::tuple t; }; // OMP 5.2 12.6.1 grainsize-clause -> grainsize ([prescriptiveness :] value) @@ -3794,31 +3831,19 @@ struct OmpLinearClause { std::variant u; }; -WRAPPER_CLASS(OmpMapperIdentifier, std::optional); - -// 2.15.5.1 map -> -// MAP ([MAPPER(mapper-identifier)] [[map-type-modifier-list [,]] -// [iterator-modifier [,]] map-type : ] -// variable-name-list) -// map-type-modifier-list -> map-type-modifier [,] [...] -// map-type-modifier -> ALWAYS | CLOSE | PRESENT | OMPX_HOLD -// map-type -> TO | FROM | TOFROM | ALLOC | RELEASE | DELETE +// Ref: [4.5:216-219], [5.0:315-324], [5.1:347-355], [5.2:150-158] +// +// map-clause -> +// MAP([modifier...:] locator-list) // since 4.5 +// modifier -> +// map-type-modifier | // since 4.5 +// mapper | // since 5.0 +// iterator | // since 5.1 +// map-type // since 4.5 struct OmpMapClause { - ENUM_CLASS(TypeModifier, Always, Close, Present, Ompx_Hold); - ENUM_CLASS(Type, To, From, Tofrom, Alloc, Release, Delete) TUPLE_CLASS_BOILERPLATE(OmpMapClause); - - // All modifiers are parsed into optional lists, even if they are unique. - // The checks for satisfying those constraints are deferred to semantics. - // In OpenMP 5.2 the non-comma syntax has been deprecated: keep the - // information about separator presence to emit a diagnostic if needed. - std::tuple>, - std::optional>, // unique - std::optional>, // unique - OmpObjectList, - bool> // were the modifiers comma-separated? - t; + MODIFIER_BOILERPLATE(OmpMapTypeModifier, OmpMapper, OmpIterator, OmpMapType); + std::tuple t; }; // Ref: [5.0:101-109], [5.1:126-134], [5.2:233-234] @@ -3869,23 +3894,17 @@ struct OmpScheduleClause { // Ref: [4.5:107-109], [5.0:176-180], [5.1:205-210], [5.2:167-168] // // to-clause (in DECLARE TARGET) -> -// TO(extended-list) | // until 5.1 +// TO(extended-list) | // until 5.1 // to-clause (in TARGET UPDATE) -> // TO(locator-list) | -// TO(mapper-modifier: locator-list) | // since 5.0 -// TO(motion-modifier[,] ...: locator-list) // since 5.1 -// motion-modifier -> +// TO(mapper-modifier: locator-list) | // since 5.0 +// TO(motion-modifier[,] ...: locator-list) // since 5.1 +// motion-modifier -> // PRESENT | mapper-modifier | iterator-modifier struct OmpToClause { - using Expectation = OmpFromClause::Expectation; TUPLE_CLASS_BOILERPLATE(OmpToClause); - - // As in the case of MAP, modifiers are parsed as lists, even if they - // are unique. These restrictions will be checked in semantic checks. - std::tuple>, - std::optional>, OmpObjectList, - bool> // were the modifiers comma-separated? - t; + MODIFIER_BOILERPLATE(OmpExpectation, OmpIterator, OmpMapper); + std::tuple t; }; // OMP 5.2 12.6.2 num_tasks-clause -> num_tasks ([prescriptiveness :] value) @@ -3897,8 +3916,10 @@ struct OmpNumTasksClause { // Ref: [5.0:254-255], [5.1:287-288], [5.2:321-322] // -// update-clause -> UPDATE(dependence-type) // since 5.0, until 5.1 -// update-clause -> UPDATE(task-dependence-type) // since 5.2 +// update-clause -> +// UPDATE(dependence-type) // since 5.0, until 5.1 +// update-clause -> +// UPDATE(task-dependence-type) // since 5.2 struct OmpUpdateClause { UNION_CLASS_BOILERPLATE(OmpUpdateClause); std::variant u; diff --git a/flang/include/flang/Runtime/CUDA/allocatable.h b/flang/include/flang/Runtime/CUDA/allocatable.h index e986ad910a3f3..e2156281d1b2b 100644 --- a/flang/include/flang/Runtime/CUDA/allocatable.h +++ b/flang/include/flang/Runtime/CUDA/allocatable.h @@ -16,9 +16,28 @@ namespace Fortran::runtime::cuda { extern "C" { +/// Perform allocation of the descriptor. +int RTDECL(CUFAllocatableAllocate)(Descriptor &, int64_t stream = -1, + bool hasStat = false, const Descriptor *errMsg = nullptr, + const char *sourceFile = nullptr, int sourceLine = 0); + /// Perform allocation of the descriptor with synchronization of it when /// necessary. -int RTDECL(CUFAllocatableAllocate)(Descriptor &, bool hasStat = false, +int RTDECL(CUFAllocatableAllocateSync)(Descriptor &, int64_t stream = -1, + bool hasStat = false, const Descriptor *errMsg = nullptr, + const char *sourceFile = nullptr, int sourceLine = 0); + +/// Perform allocation of the descriptor without synchronization. Assign data +/// from source. +int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc, + const Descriptor &source, int64_t stream = -1, bool hasStat = false, + const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, + int sourceLine = 0); + +/// Perform allocation of the descriptor with synchronization of it when +/// necessary. Assign data from source. +int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc, + const Descriptor &source, int64_t stream = -1, bool hasStat = false, const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, int sourceLine = 0); diff --git a/flang/include/flang/Runtime/CUDA/memmove-function.h b/flang/include/flang/Runtime/CUDA/memmove-function.h new file mode 100644 index 0000000000000..74d6a05eff4c9 --- /dev/null +++ b/flang/include/flang/Runtime/CUDA/memmove-function.h @@ -0,0 +1,23 @@ +//===-- include/flang/Runtime/CUDA/memmove-function.h -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#ifndef FORTRAN_RUNTIME_CUDA_MEMMOVE_FUNCTION_H_ +#define FORTRAN_RUNTIME_CUDA_MEMMOVE_FUNCTION_H_ + +namespace Fortran::runtime::cuda { + +void *MemmoveHostToDevice(void *dst, const void *src, std::size_t count); + +void *MemmoveDeviceToHost(void *dst, const void *src, std::size_t count); + +void *MemmoveDeviceToDevice(void *dst, const void *src, std::size_t count); + +} // namespace Fortran::runtime::cuda +#endif // FORTRAN_RUNTIME_CUDA_MEMMOVE_FUNCTION_H_ diff --git a/flang/include/flang/Semantics/openmp-modifiers.h b/flang/include/flang/Semantics/openmp-modifiers.h index beab4c9b46a21..60f116e6f0033 100644 --- a/flang/include/flang/Semantics/openmp-modifiers.h +++ b/flang/include/flang/Semantics/openmp-modifiers.h @@ -10,6 +10,7 @@ #define FORTRAN_SEMANTICS_OPENMP_MODIFIERS_H_ #include "flang/Common/enum-set.h" +#include "flang/Parser/characters.h" #include "flang/Parser/parse-tree.h" #include "flang/Semantics/semantics.h" #include "llvm/ADT/STLExtras.h" @@ -18,6 +19,7 @@ #include #include +#include #include #include @@ -51,6 +53,7 @@ struct OmpModifierDescriptor { // Modifier name for use in diagnostic messages. const OmpProperties &props(unsigned version) const; const OmpClauses &clauses(unsigned version) const; + unsigned since(llvm::omp::Clause id) const; const llvm::StringRef name; // Version-dependent properties of the modifier. @@ -61,26 +64,25 @@ struct OmpModifierDescriptor { template const OmpModifierDescriptor &OmpGetDescriptor(); -template <> -const OmpModifierDescriptor &OmpGetDescriptor(); -template <> -const OmpModifierDescriptor &OmpGetDescriptor(); -template <> -const OmpModifierDescriptor &OmpGetDescriptor(); -template <> -const OmpModifierDescriptor &OmpGetDescriptor(); -template <> -const OmpModifierDescriptor &OmpGetDescriptor(); -template <> -const OmpModifierDescriptor &OmpGetDescriptor(); -template <> -const OmpModifierDescriptor &OmpGetDescriptor(); -template <> -const OmpModifierDescriptor &OmpGetDescriptor(); -template <> -const OmpModifierDescriptor &OmpGetDescriptor(); -template <> -const OmpModifierDescriptor &OmpGetDescriptor(); +#define DECLARE_DESCRIPTOR(name) \ + template <> const OmpModifierDescriptor &OmpGetDescriptor() + +DECLARE_DESCRIPTOR(parser::OmpChunkModifier); +DECLARE_DESCRIPTOR(parser::OmpDependenceType); +DECLARE_DESCRIPTOR(parser::OmpExpectation); +DECLARE_DESCRIPTOR(parser::OmpIterator); +DECLARE_DESCRIPTOR(parser::OmpLinearModifier); +DECLARE_DESCRIPTOR(parser::OmpMapper); +DECLARE_DESCRIPTOR(parser::OmpMapType); +DECLARE_DESCRIPTOR(parser::OmpMapTypeModifier); +DECLARE_DESCRIPTOR(parser::OmpOrderModifier); +DECLARE_DESCRIPTOR(parser::OmpOrderingModifier); +DECLARE_DESCRIPTOR(parser::OmpReductionIdentifier); +DECLARE_DESCRIPTOR(parser::OmpReductionModifier); +DECLARE_DESCRIPTOR(parser::OmpTaskDependenceType); +DECLARE_DESCRIPTOR(parser::OmpVariableCategory); + +#undef DECLARE_DESCRIPTOR // Explanation of terminology: // @@ -94,7 +96,7 @@ const OmpModifierDescriptor &OmpGetDescriptor(); // std::tuple>, ...> t; // }; // -// The Speficic1, etc. refer to parser classes that represent modifiers, +// The Specific1, etc. refer to parser classes that represent modifiers, // e.g. OmpIterator or OmpTaskDependenceType. The Variant type contains // all modifiers that are allowed for a given clause. The Modifier class // is there to wrap the variant into the form that the parse tree visitor @@ -148,39 +150,110 @@ typename std::list::const_iterator findInRange( } } // namespace detail -/// Finds the entry in the list that holds the `SpecificTy` alternative, +/// Finds the first entry in the list that holds the `SpecificTy` alternative, /// and returns the pointer to that alternative. If such an entry does not /// exist, it returns nullptr. -/// The list is assumed to contain at most one such item, with a check -/// whether the condition is met. -/// This function should only be called after the verification of modifier -/// properties has been performed, since it will assert if multiple items -/// are found. template const SpecificTy *OmpGetUniqueModifier( const std::optional> &modifiers) { const SpecificTy *found{nullptr}; if (modifiers) { auto end{modifiers->cend()}; - // typename std::list::iterator end{modifiers->end()}; auto at{detail::findInRange(modifiers->cbegin(), end)}; if (at != end) { found = &std::get(at->u); -#ifndef NDEBUG - auto another{ - detail::findInRange(std::next(at), end)}; - assert(another == end && "repeated modifier"); -#endif } } return found; } +template struct OmpSpecificModifierIterator { + using VectorTy = std::vector; + OmpSpecificModifierIterator( + std::shared_ptr list, typename VectorTy::const_iterator where) + : specificList(list), at(where) {} + + OmpSpecificModifierIterator &operator++() { + ++at; + return *this; + } + // OmpSpecificModifierIterator &operator++(int); + OmpSpecificModifierIterator &operator--() { + --at; + return *this; + } + // OmpSpecificModifierIterator &operator--(int); + + const SpecificTy *operator*() const { return *at; } + bool operator==(const OmpSpecificModifierIterator &other) const { + assert(specificList.get() == other.specificList.get() && + "comparing unrelated iterators"); + return at == other.at; + } + bool operator!=(const OmpSpecificModifierIterator &other) const { + return !(*this == other); + } + +private: + std::shared_ptr specificList; + typename VectorTy::const_iterator at; +}; + +template +llvm::iterator_range> +OmpGetRepeatableModifier(const std::optional> &modifiers) { + using VectorTy = std::vector; + std::shared_ptr items(new VectorTy); + if (modifiers) { + for (auto &m : *modifiers) { + if (auto *s = std::get_if(&m.u)) { + items->push_back(s); + } + } + } + return llvm::iterator_range( + OmpSpecificModifierIterator(items, items->begin()), + OmpSpecificModifierIterator(items, items->end())); +} + +template +llvm::iterator_range> +OmpGetRepeatableModifier(std::optional> &&) = delete; + namespace detail { template constexpr const T *make_nullptr() { return static_cast(nullptr); } +/// Verify that all modifiers are allowed in the given OpenMP version. +template +bool verifyVersions(const std::optional> &modifiers, + llvm::omp::Clause id, parser::CharBlock clauseSource, + SemanticsContext &semaCtx) { + if (!modifiers) { + return true; + } + unsigned version{semaCtx.langOptions().OpenMPVersion}; + bool result{true}; + for (auto &m : *modifiers) { + const OmpModifierDescriptor &desc{OmpGetDescriptor(m)}; + unsigned since{desc.since(id)}; + if (since == ~0u) { + // This shouldn't really happen, but have it just in case. + semaCtx.Say(m.source, + "'%s' modifier is not supported on %s clause"_err_en_US, + desc.name.str(), + parser::ToUpperCaseLetters(llvm::omp::getOpenMPClauseName(id))); + } else if (version < since) { + semaCtx.Say(m.source, + "'%s' modifier is not supported in OpenMP v%d.%d, try -fopenmp-version=%d"_warn_en_US, + desc.name.str(), version / 10, version % 10, since); + result = false; + } + } + return result; +} + /// Helper function for verifying the Required property: /// For a specific SpecificTy, if SpecificTy is has the Required property, /// check if the list has an item that holds SpecificTy as an alternative. @@ -201,7 +274,7 @@ bool verifyIfRequired(const SpecificTy *, }); if (!present) { semaCtx.Say( - clauseSource, "A %s modifier is required"_err_en_US, desc.name.str()); + clauseSource, "'%s' modifier is required"_err_en_US, desc.name.str()); } return present; } @@ -224,7 +297,8 @@ bool verifyRequiredPack(const std::optional> &modifiers, /// list is valid, or false otherwise. template bool verifyRequired(const std::optional> &modifiers, - parser::CharBlock clauseSource, SemanticsContext &semaCtx) { + llvm::omp::Clause id, parser::CharBlock clauseSource, + SemanticsContext &semaCtx) { using VariantTy = typename UnionTy::Variant; return verifyRequiredPack(modifiers, clauseSource, semaCtx, std::make_index_sequence>{}); @@ -253,7 +327,8 @@ bool verifyIfUnique(const SpecificTy *, auto next{ detail::findInRange(std::next(specific), end)}; if (next != end) { - semaCtx.Say(next->source, "A %s cannot occur multiple times"_err_en_US, + semaCtx.Say(next->source, + "'%s' modifier cannot occur multiple times"_err_en_US, desc.name.str()); } } @@ -264,7 +339,8 @@ bool verifyIfUnique(const SpecificTy *, /// list is valid, or false otherwise. template bool verifyUnique(const std::optional> &modifiers, - parser::CharBlock clauseSource, SemanticsContext &semaCtx) { + llvm::omp::Clause id, parser::CharBlock clauseSource, + SemanticsContext &semaCtx) { if (!modifiers) { return true; } @@ -284,7 +360,8 @@ bool verifyUnique(const std::optional> &modifiers, /// list is valid, or false otherwise. template bool verifyUltimate(const std::optional> &modifiers, - parser::CharBlock clauseSource, SemanticsContext &semaCtx) { + llvm::omp::Clause id, parser::CharBlock clauseSource, + SemanticsContext &semaCtx) { if (!modifiers || modifiers->size() <= 1) { return true; } @@ -314,8 +391,8 @@ bool verifyUltimate(const std::optional> &modifiers, } llvm::StringRef where{isPre ? "last" : "first"}; semaCtx.Say(it->source, - "The %s should be the %s modifier"_err_en_US, - desc.name.str(), where.str()); + "'%s' should be the %s modifier"_err_en_US, desc.name.str(), + where.str()); return false; } return true; @@ -330,7 +407,8 @@ bool verifyUltimate(const std::optional> &modifiers, /// list is valid, or false otherwise. template bool verifyExclusive(const std::optional> &modifiers, - parser::CharBlock clauseSource, SemanticsContext &semaCtx) { + llvm::omp::Clause id, parser::CharBlock clauseSource, + SemanticsContext &semaCtx) { if (!modifiers || modifiers->size() <= 1) { return true; } @@ -345,11 +423,11 @@ bool verifyExclusive(const std::optional> &modifiers, const OmpModifierDescriptor &descExcl{OmpGetDescriptor(excl)}; const OmpModifierDescriptor &descOther{OmpGetDescriptor(other)}; parser::MessageFormattedText txt( - "An exclusive %s cannot be specified together with a modifier of a different type"_err_en_US, + "An exclusive '%s' modifier cannot be specified together with a modifier of a different type"_err_en_US, descExcl.name.str()); parser::Message message(excl.source, txt); message.Attach( - other.source, "%s provided here"_en_US, descOther.name.str()); + other.source, "'%s' provided here"_en_US, descOther.name.str()); semaCtx.Say(std::move(message)); }}; @@ -387,14 +465,16 @@ bool verifyExclusive(const std::optional> &modifiers, } // namespace detail template -bool OmpVerifyModifiers(const ClauseTy &clause, parser::CharBlock clauseSource, - SemanticsContext &semaCtx) { +bool OmpVerifyModifiers(const ClauseTy &clause, llvm::omp::Clause id, + parser::CharBlock clauseSource, SemanticsContext &semaCtx) { auto &modifiers{OmpGetModifiers(clause)}; - bool result{detail::verifyRequired(modifiers, clauseSource, semaCtx)}; - result = detail::verifyUnique(modifiers, clauseSource, semaCtx) && result; - result = detail::verifyUltimate(modifiers, clauseSource, semaCtx) && result; - result = detail::verifyExclusive(modifiers, clauseSource, semaCtx) && result; - return result; + bool results[]{// + detail::verifyVersions(modifiers, id, clauseSource, semaCtx), + detail::verifyRequired(modifiers, id, clauseSource, semaCtx), + detail::verifyUnique(modifiers, id, clauseSource, semaCtx), + detail::verifyUltimate(modifiers, id, clauseSource, semaCtx), + detail::verifyExclusive(modifiers, id, clauseSource, semaCtx)}; + return llvm::all_of(results, [](bool x) { return x; }); } } // namespace Fortran::semantics diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 0f2e849c2c6a0..6baa22a44eafb 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -1000,7 +1000,7 @@ bool ClauseProcessor::processMap( const parser::CharBlock &source) { using Map = omp::clause::Map; mlir::Location clauseLocation = converter.genLocation(source); - const auto &mapType = std::get>(clause.t); + const auto &[mapType, typeMods, mappers, iterator, objects] = clause.t; llvm::omp::OpenMPOffloadMappingFlags mapTypeBits = llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE; // If the map type is specified, then process it else Tofrom is the @@ -1029,13 +1029,11 @@ bool ClauseProcessor::processMap( mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_DELETE; } - auto &modTypeMods = - std::get>(clause.t); - if (modTypeMods) { - if (llvm::is_contained(*modTypeMods, Map::MapTypeModifier::Always)) + if (typeMods) { + if (llvm::is_contained(*typeMods, Map::MapTypeModifier::Always)) mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS; // Diagnose unimplemented map-type-modifiers. - if (llvm::any_of(*modTypeMods, [](Map::MapTypeModifier m) { + if (llvm::any_of(*typeMods, [](Map::MapTypeModifier m) { return m != Map::MapTypeModifier::Always; })) { TODO(currentLocation, "Map type modifiers (other than 'ALWAYS')" @@ -1043,10 +1041,14 @@ bool ClauseProcessor::processMap( } } - if (std::get>(clause.t)) { + if (iterator) { TODO(currentLocation, "Support for iterator modifiers is not implemented yet"); } + if (mappers) { + TODO(currentLocation, + "Support for mapper modifiers is not implemented yet"); + } processMapObjects(stmtCtx, clauseLocation, std::get(clause.t), mapTypeBits, diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index 8639d08827f4e..bf20f42bdecaf 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -584,7 +584,7 @@ Defaultmap make(const parser::OmpClause::Defaultmap &inp, // clang-format on ); - auto &mods{semantics::OmpGetModifiers(inp.v)}; + auto &mods = semantics::OmpGetModifiers(inp.v); auto &t0 = std::get(inp.v.t); auto *t1 = semantics::OmpGetUniqueModifier(mods); @@ -764,37 +764,35 @@ Firstprivate make(const parser::OmpClause::Firstprivate &inp, From make(const parser::OmpClause::From &inp, semantics::SemanticsContext &semaCtx) { // inp.v -> parser::OmpFromClause - using wrapped = parser::OmpFromClause; - CLAUSET_ENUM_CONVERT( // - convert, parser::OmpFromClause::Expectation, From::Expectation, + convert, parser::OmpExpectation::Value, From::Expectation, // clang-format off MS(Present, Present) // clang-format on ); - auto &t0 = std::get>>(inp.v.t); - auto &t1 = std::get>>(inp.v.t); - auto &t2 = std::get(inp.v.t); - - assert((!t0 || t0->size() == 1) && "Only one expectation modifier allowed"); - assert((!t1 || t1->size() == 1) && "Only one iterator modifier allowed"); + auto &mods = semantics::OmpGetModifiers(inp.v); + auto *t0 = semantics::OmpGetUniqueModifier(mods); + auto *t1 = semantics::OmpGetUniqueModifier(mods); + auto *t2 = semantics::OmpGetUniqueModifier(mods); + auto &t3 = std::get(inp.v.t); - auto expectation = [&]() -> std::optional { - if (t0) - return convert(t0->front()); + auto mappers = [&]() -> std::optional> { + if (t1) + return List{Mapper{makeObject(t1->v, semaCtx)}}; return std::nullopt; }(); auto iterator = [&]() -> std::optional { - if (t1) - return makeIterator(t1->front(), semaCtx); + if (t2) + return makeIterator(*t2, semaCtx); return std::nullopt; }(); - return From{{/*Expectation=*/std::move(expectation), /*Mapper=*/std::nullopt, + return From{{/*Expectation=*/maybeApplyToV(convert, t0), + /*Mappers=*/std::move(mappers), /*Iterator=*/std::move(iterator), - /*LocatorList=*/makeObjects(t2, semaCtx)}}; + /*LocatorList=*/makeObjects(t3, semaCtx)}}; } // Full: empty @@ -963,10 +961,8 @@ Link make(const parser::OmpClause::Link &inp, Map make(const parser::OmpClause::Map &inp, semantics::SemanticsContext &semaCtx) { // inp.v -> parser::OmpMapClause - using wrapped = parser::OmpMapClause; - CLAUSET_ENUM_CONVERT( // - convert1, parser::OmpMapClause::Type, Map::MapType, + convert1, parser::OmpMapType::Value, Map::MapType, // clang-format off MS(Alloc, Alloc) MS(Delete, Delete) @@ -978,7 +974,7 @@ Map make(const parser::OmpClause::Map &inp, ); CLAUSET_ENUM_CONVERT( // - convert2, parser::OmpMapClause::TypeModifier, Map::MapTypeModifier, + convert2, parser::OmpMapTypeModifier::Value, Map::MapTypeModifier, // clang-format off MS(Always, Always) MS(Close, Close) @@ -987,42 +983,43 @@ Map make(const parser::OmpClause::Map &inp, // clang-format on ); - auto &t0 = std::get>>(inp.v.t); - auto &t1 = std::get>>(inp.v.t); - auto &t2 = std::get>>(inp.v.t); - auto &t3 = std::get(inp.v.t); - auto &t4 = std::get(inp.v.t); - - if (t4.v) - TODO_NOLOC("OmpMapClause(MAPPER(...)): user defined mapper not supported"); + auto &mods = semantics::OmpGetModifiers(inp.v); + auto *t1 = semantics::OmpGetUniqueModifier(mods); + auto *t2 = semantics::OmpGetUniqueModifier(mods); + auto *t3 = semantics::OmpGetUniqueModifier(mods); + auto &t4 = std::get(inp.v.t); - // These should have been diagnosed already. - assert((!t1 || t1->size() == 1) && "Only one iterator modifier is allowed"); - assert((!t2 || t2->size() == 1) && "Only one map type is allowed"); + auto mappers = [&]() -> std::optional> { + if (t1) + return List{Mapper{makeObject(t1->v, semaCtx)}}; + return std::nullopt; + }(); auto iterator = [&]() -> std::optional { - if (t1) - return makeIterator(t1->front(), semaCtx); + if (t2) + return makeIterator(*t2, semaCtx); return std::nullopt; }(); - std::optional maybeType; - if (t2) - maybeType = maybeApply(convert1, std::optional(t2->front())); + auto type = [&]() -> std::optional { + if (t3) + return convert1(t3->v); + return Map::MapType::Tofrom; + }(); - std::optional maybeTypeMods = maybeApply( - [&](const std::list &typeMods) { - Map::MapTypeModifiers mods; - for (wrapped::TypeModifier mod : typeMods) - mods.push_back(convert2(mod)); - return mods; - }, - t0); + Map::MapTypeModifiers typeMods; + for (auto *typeMod : + semantics::OmpGetRepeatableModifier(mods)) { + typeMods.push_back(convert2(typeMod->v)); + } + std::optional maybeTypeMods{}; + if (!typeMods.empty()) + maybeTypeMods = std::move(typeMods); - return Map{{/*MapType=*/maybeType, - /*MapTypeModifiers=*/maybeTypeMods, - /*Mapper=*/std::nullopt, /*Iterator=*/std::move(iterator), - /*LocatorList=*/makeObjects(t3, semaCtx)}}; + return Map{{/*MapType=*/std::move(type), + /*MapTypeModifiers=*/std::move(maybeTypeMods), + /*Mapper=*/std::move(mappers), /*Iterator=*/std::move(iterator), + /*LocatorList=*/makeObjects(t4, semaCtx)}}; } // Match: incomplete @@ -1316,37 +1313,35 @@ ThreadLimit make(const parser::OmpClause::ThreadLimit &inp, To make(const parser::OmpClause::To &inp, semantics::SemanticsContext &semaCtx) { // inp.v -> parser::OmpToClause - using wrapped = parser::OmpToClause; - CLAUSET_ENUM_CONVERT( // - convert, parser::OmpToClause::Expectation, To::Expectation, + convert, parser::OmpExpectation::Value, To::Expectation, // clang-format off MS(Present, Present) // clang-format on ); - auto &t0 = std::get>>(inp.v.t); - auto &t1 = std::get>>(inp.v.t); - auto &t2 = std::get(inp.v.t); - - assert((!t0 || t0->size() == 1) && "Only one expectation modifier allowed"); - assert((!t1 || t1->size() == 1) && "Only one iterator modifier allowed"); + auto &mods = semantics::OmpGetModifiers(inp.v); + auto *t0 = semantics::OmpGetUniqueModifier(mods); + auto *t1 = semantics::OmpGetUniqueModifier(mods); + auto *t2 = semantics::OmpGetUniqueModifier(mods); + auto &t3 = std::get(inp.v.t); - auto expectation = [&]() -> std::optional { - if (t0) - return convert(t0->front()); + auto mappers = [&]() -> std::optional> { + if (t1) + return List{Mapper{makeObject(t1->v, semaCtx)}}; return std::nullopt; }(); auto iterator = [&]() -> std::optional { - if (t1) - return makeIterator(t1->front(), semaCtx); + if (t2) + return makeIterator(*t2, semaCtx); return std::nullopt; }(); - return To{{/*Expectation=*/std::move(expectation), /*Mapper=*/std::nullopt, + return To{{/*Expectation=*/maybeApplyToV(convert, t0), + /*Mappers=*/{std::move(mappers)}, /*Iterator=*/std::move(iterator), - /*LocatorList=*/makeObjects(t2, semaCtx)}}; + /*LocatorList=*/makeObjects(t3, semaCtx)}}; } // UnifiedAddress: empty diff --git a/flang/lib/Lower/OpenMP/Clauses.h b/flang/lib/Lower/OpenMP/Clauses.h index 9c74404801bbc..5fac5c2271c3b 100644 --- a/flang/lib/Lower/OpenMP/Clauses.h +++ b/flang/lib/Lower/OpenMP/Clauses.h @@ -168,6 +168,7 @@ std::optional getBaseObject(const Object &object, namespace clause { using Range = tomp::type::RangeT; +using Mapper = tomp::type::MapperT; using Iterator = tomp::type::IteratorT; using IteratorSpecifier = tomp::type::IteratorSpecifierT; using DefinedOperator = tomp::type::DefinedOperatorT; diff --git a/flang/lib/Optimizer/CodeGen/Target.cpp b/flang/lib/Optimizer/CodeGen/Target.cpp index 9ec055b1aecab..9ec1655d92f44 100644 --- a/flang/lib/Optimizer/CodeGen/Target.cpp +++ b/flang/lib/Optimizer/CodeGen/Target.cpp @@ -825,6 +825,97 @@ struct TargetAArch64 : public GenericTarget { } return marshal; } + + // Flatten a RecordType::TypeList containing more record types or array types + static std::optional> + flattenTypeList(const RecordType::TypeList &types) { + std::vector flatTypes; + // The flat list will be at least the same size as the non-flat list. + flatTypes.reserve(types.size()); + for (auto [c, type] : types) { + // Flatten record type + if (auto recTy = mlir::dyn_cast(type)) { + auto subTypeList = flattenTypeList(recTy.getTypeList()); + if (!subTypeList) + return std::nullopt; + llvm::copy(*subTypeList, std::back_inserter(flatTypes)); + continue; + } + + // Flatten array type + if (auto seqTy = mlir::dyn_cast(type)) { + if (seqTy.hasDynamicExtents()) + return std::nullopt; + std::size_t n = seqTy.getConstantArraySize(); + auto eleTy = seqTy.getElementType(); + // Flatten array of record types + if (auto recTy = mlir::dyn_cast(eleTy)) { + auto subTypeList = flattenTypeList(recTy.getTypeList()); + if (!subTypeList) + return std::nullopt; + for (std::size_t i = 0; i < n; ++i) + llvm::copy(*subTypeList, std::back_inserter(flatTypes)); + } else { + std::fill_n(std::back_inserter(flatTypes), + seqTy.getConstantArraySize(), eleTy); + } + continue; + } + + // Other types are already flat + flatTypes.push_back(type); + } + return flatTypes; + } + + // Determine if the type is a Homogenous Floating-point Aggregate (HFA). An + // HFA is a record type with up to 4 floating-point members of the same type. + static bool isHFA(fir::RecordType ty) { + RecordType::TypeList types = ty.getTypeList(); + if (types.empty() || types.size() > 4) + return false; + + std::optional> flatTypes = flattenTypeList(types); + if (!flatTypes || flatTypes->size() > 4) { + return false; + } + + if (!isa_real(flatTypes->front())) { + return false; + } + + return llvm::all_equal(*flatTypes); + } + + // AArch64 procedure call ABI: + // https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#parameter-passing + CodeGenSpecifics::Marshalling + structReturnType(mlir::Location loc, fir::RecordType ty) const override { + CodeGenSpecifics::Marshalling marshal; + + if (isHFA(ty)) { + // Just return the existing record type + marshal.emplace_back(ty, AT{}); + return marshal; + } + + auto [size, align] = + fir::getTypeSizeAndAlignmentOrCrash(loc, ty, getDataLayout(), kindMap); + + // return in registers if size <= 16 bytes + if (size <= 16) { + std::size_t dwordSize = (size + 7) / 8; + auto newTy = fir::SequenceType::get( + dwordSize, mlir::IntegerType::get(ty.getContext(), 64)); + marshal.emplace_back(newTy, AT{}); + return marshal; + } + + unsigned short stackAlign = std::max(align, 8u); + marshal.emplace_back(fir::ReferenceType::get(ty), + AT{stackAlign, false, true}); + return marshal; + } }; } // namespace diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp index 225c585a02d91..6e130a96eb8dd 100644 --- a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp +++ b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp @@ -126,9 +126,9 @@ static bool mustParallelizeOp(Operation *op) { // omp.workshare.loop_wrapper {} // // Therefore, we skip if we encounter a nested omp.workshare. - if (isa(op)) + if (isa(nested)) return WalkResult::skip(); - if (isa(op)) + if (isa(nested)) return WalkResult::interrupt(); return WalkResult::advance(); }) @@ -253,8 +253,7 @@ static void parallelizeRegion(Region &sourceRegion, Region &targetRegion, // Either we have already remapped it bool remapped = rootMapping.contains(opr); // Or it is available because it dominates `sr` - bool dominates = - di.properlyDominates(opr.getDefiningOp(), &*sr.begin); + bool dominates = di.properlyDominates(opr, &*sr.begin); return remapped || dominates; })) { // Safe to parallelize operations which have all operands available in @@ -405,7 +404,7 @@ static void parallelizeRegion(Region &sourceRegion, Region &targetRegion, if (sourceRegion.hasOneBlock()) { handleOneBlock(sourceRegion.front()); - } else { + } else if (!sourceRegion.empty()) { auto &domTree = di.getDomTree(&sourceRegion); for (auto node : llvm::breadth_first(domTree.getRootNode())) { handleOneBlock(*node->getBlock()); diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index f1ebd08967b9a..5056c48c91cfa 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -155,8 +155,12 @@ static mlir::LogicalResult convertOpToCall(OpTy op, auto fTy = func.getFunctionType(); mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc); - mlir::Value sourceLine = - fir::factory::locationToLineNo(builder, loc, fTy.getInput(4)); + mlir::Value sourceLine; + if constexpr (std::is_same_v) + sourceLine = fir::factory::locationToLineNo( + builder, loc, op.getSource() ? fTy.getInput(6) : fTy.getInput(5)); + else + sourceLine = fir::factory::locationToLineNo(builder, loc, fTy.getInput(4)); mlir::Value hasStat = op.getHasStat() ? builder.createBool(loc, true) : builder.createBool(loc, false); @@ -168,8 +172,30 @@ static mlir::LogicalResult convertOpToCall(OpTy op, mlir::Type boxNoneTy = fir::BoxType::get(builder.getNoneType()); errmsg = builder.create(loc, boxNoneTy).getResult(); } - llvm::SmallVector args{fir::runtime::createArguments( - builder, loc, fTy, op.getBox(), hasStat, errmsg, sourceFile, sourceLine)}; + llvm::SmallVector args; + if constexpr (std::is_same_v) { + if (op.getSource()) { + mlir::Value stream = + op.getStream() + ? op.getStream() + : builder.createIntegerConstant(loc, fTy.getInput(2), -1); + args = fir::runtime::createArguments(builder, loc, fTy, op.getBox(), + op.getSource(), stream, hasStat, + errmsg, sourceFile, sourceLine); + } else { + mlir::Value stream = + op.getStream() + ? op.getStream() + : builder.createIntegerConstant(loc, fTy.getInput(1), -1); + args = fir::runtime::createArguments(builder, loc, fTy, op.getBox(), + stream, hasStat, errmsg, sourceFile, + sourceLine); + } + } else { + args = + fir::runtime::createArguments(builder, loc, fTy, op.getBox(), hasStat, + errmsg, sourceFile, sourceLine); + } auto callOp = builder.create(loc, func, args); rewriter.replaceOp(op, callOp); return mlir::success(); @@ -182,14 +208,6 @@ struct CUFAllocateOpConversion mlir::LogicalResult matchAndRewrite(cuf::AllocateOp op, mlir::PatternRewriter &rewriter) const override { - // TODO: Allocation with source will need a new entry point in the runtime. - if (op.getSource()) - return mlir::failure(); - - // TODO: Allocation using different stream. - if (op.getStream()) - return mlir::failure(); - // TODO: Pinned is a reference to a logical value that can be set to true // when pinned allocation succeed. This will require a new entry point. if (op.getPinned()) @@ -202,18 +220,26 @@ struct CUFAllocateOpConversion if (hasDoubleDescriptors(op)) { // Allocation for module variable are done with custom runtime entry point // so the descriptors can be synchronized. - mlir::func::FuncOp func = - fir::runtime::getRuntimeFunc( - loc, builder); - return convertOpToCall(op, rewriter, func); + mlir::func::FuncOp func; + if (op.getSource()) + func = fir::runtime::getRuntimeFunc(loc, builder); + else + func = + fir::runtime::getRuntimeFunc( + loc, builder); + return convertOpToCall(op, rewriter, func); } - // Allocation for local descriptor falls back on the standard runtime - // AllocatableAllocate as the dedicated allocator is set in the descriptor - // before the call. - mlir::func::FuncOp func = - fir::runtime::getRuntimeFunc(loc, - builder); + mlir::func::FuncOp func; + if (op.getSource()) + func = + fir::runtime::getRuntimeFunc( + loc, builder); + else + func = fir::runtime::getRuntimeFunc( + loc, builder); + return convertOpToCall(op, rewriter, func); } }; @@ -236,7 +262,7 @@ struct CUFDeallocateOpConversion mlir::func::FuncOp func = fir::runtime::getRuntimeFunc( loc, builder); - return convertOpToCall(op, rewriter, func); + return convertOpToCall(op, rewriter, func); } // Deallocation for local descriptor falls back on the standard runtime diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index ceae20270d13d..2040a3e7ed5ae 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -23,137 +23,36 @@ namespace Fortran::parser { constexpr auto startOmpLine = skipStuffBeforeStatement >> "!$OMP "_sptok; constexpr auto endOmpLine = space >> endOfLine; -// Helper class to deal with a list of modifiers of various types. -// The list (to be parsed) is assumed to start with all modifiers of the -// first type, followed by a list of modifiers of the second type, etc. -// Each list can be empty, e.g. -// mod_of_kind_2, mod_of_kind_3, mod_of_kind_5, ... -// The result type is a tuple of optional lists of each modifier type. -template -struct ConcatSeparated { - template - using OptListOf = std::optional>; - template using TupleFor = std::tuple>; - - using resultType = std::tuple, OptListOf...>; - - constexpr ConcatSeparated(ConcatSeparated &&) = default; - constexpr ConcatSeparated(const ConcatSeparated &) = default; - constexpr ConcatSeparated(Separator sep, Parser p, Parsers... ps) - : parser_(p), sepAndParsers_(sep, ps...) {} +template struct ModifierList { + constexpr ModifierList(Separator sep) : sep_(sep) {} + constexpr ModifierList(const ModifierList &) = default; + constexpr ModifierList(ModifierList &&) = default; - std::optional Parse(ParseState &state) const { - // firstParser is a list parser, it returns optional. - auto firstParser = - attempt(nonemptySeparated(parser_, std::get<0>(sepAndParsers_))); - - if constexpr (sizeof...(Parsers) == 0) { - return TupleFor{std::move(firstParser.Parse(state))}; - } else { - using restParserType = ConcatSeparated; - auto restParser = std::make_from_tuple(sepAndParsers_); - - if (auto first{firstParser.Parse(state)}) { - if (attempt(std::get<0>(sepAndParsers_)).Parse(state)) { - return std::tuple_cat(TupleFor(std::move(*first)), - std::move(*restParser.Parse(state))); - } - return std::tuple_cat(TupleFor{std::move(*first)}, - std::tuple...>{}); - } - return std::tuple_cat( - TupleFor{}, std::move(*restParser.Parse(state))); - } - } - -private: - const Parser parser_; - const std::tuple sepAndParsers_; -}; - -// Map modifiers come from four categories: -// - map-type-modifier, -// - mapper (not parsed yet), -// - iterator, -// - map-type. -// There can be zero or more map-type-modifiers, and zero or one modifier -// of every other kind. -// Syntax-wise they look like a single list, where the last element could -// be a map-type, and all elements in that list are comma-separated[1]. -// Only if there was at least one modifier (of any kind) specified, the -// list must end with ":". -// There are complications coming from the fact that the comma separating the -// two kinds of modifiers is only allowed if there is at least one modifier of -// each kind. The MapModifiers parser utilizes the ConcatSeparated parser, which -// takes care of that. ConcatSeparated returns a tuple with optional lists of -// modifiers for every type. -// [1] Any of the commas are optional, but that syntax has been deprecated -// in OpenMP 5.2, and the parsing code keeps a record of whether the commas -// were present. -template struct MapModifiers { - constexpr MapModifiers(Separator sep) : sep_(sep) {} - constexpr MapModifiers(const MapModifiers &) = default; - constexpr MapModifiers(MapModifiers &&) = default; - - // Parsing of mappers is not supported yet. - using TypeModParser = Parser; - using IterParser = Parser; - using TypeParser = Parser; - using ModParser = - ConcatSeparated; - - using resultType = typename ModParser::resultType; + using resultType = std::list; std::optional Parse(ParseState &state) const { - auto mp = ModParser(sep_, TypeModParser{}, IterParser{}, TypeParser{}); - auto mods = mp.Parse(state); - // The ModParser always "succeeds", i.e. even if the input is junk, it - // will return a tuple filled with nullopts. If any of the components - // is not a nullopt, expect a ":". - if (std::apply([](auto &&...opts) { return (... || !!opts); }, *mods)) { + auto listp{nonemptySeparated(Parser{}, sep_)}; + if (auto result{attempt(listp).Parse(state)}) { if (!attempt(":"_tok).Parse(state)) { return std::nullopt; } + return std::move(result); } - return std::move(mods); + return resultType{}; } private: const Separator sep_; }; -// This is almost exactly the same thing as MapModifiers. It has the same -// issue (it expects modifiers in a specific order), and the fix for that -// will change how modifiers are parsed. Instead of making this code more -// generic, make it simple, and generalize after the fix is in place. -template struct MotionModifiers { - constexpr MotionModifiers(Separator sep) : sep_(sep) {} - constexpr MotionModifiers(const MotionModifiers &) = default; - constexpr MotionModifiers(MotionModifiers &&) = default; - - using ExpParser = Parser; - using IterParser = Parser; - using ModParser = ConcatSeparated; - - using resultType = typename ModParser::resultType; - - std::optional Parse(ParseState &state) const { - auto mp{ModParser(sep_, ExpParser{}, IterParser{})}; - auto mods{mp.Parse(state)}; - // The ModParser always "succeeds", i.e. even if the input is junk, it - // will return a tuple filled with nullopts. If any of the components - // is not a nullopt, expect a ":". - if (std::apply([](auto &&...opts) { return (... || !!opts); }, *mods)) { - if (!attempt(":"_tok).Parse(state)) { - return std::nullopt; - } - } - return std::move(mods); - } - -private: - const Separator sep_; -}; +// Use a function to create ModifierList because functions allow "partial" +// template argument deduction: "modifierList(sep)" would be legal, +// while "ModifierList(sep)" would complain about a missing template +// argument "Separator". +template +constexpr ModifierList modifierList(Separator sep) { + return ModifierList(sep); +} // OpenMP Clauses @@ -192,6 +91,16 @@ static TypeDeclarationStmt makeIterSpecDecl(std::list &&names) { // --- Parsers for clause modifiers ----------------------------------- +TYPE_PARSER(construct( // + "SIMD" >> pure(OmpChunkModifier::Value::Simd))) + +TYPE_PARSER(construct( + "SINK" >> pure(OmpDependenceType::Value::Sink) || + "SOURCE" >> pure(OmpDependenceType::Value::Source))) + +TYPE_PARSER(construct( // + "PRESENT" >> pure(OmpExpectation::Value::Present))) + TYPE_PARSER(construct( // Using Parser or Parser has the problem // that they will attempt to treat what follows the '=' as initialization. @@ -208,12 +117,9 @@ TYPE_PARSER(construct( makeIterSpecDecl, nonemptyList(Parser{}) / "="_tok)), subscriptTriplet)) -TYPE_PARSER(construct( - "SINK" >> pure(OmpDependenceType::Value::Sink) || - "SOURCE" >> pure(OmpDependenceType::Value::Source))) - // [5.0] 2.1.6 iterator -> iterator-specifier-list -TYPE_PARSER(construct("ITERATOR" >> +TYPE_PARSER(construct( // + "ITERATOR" >> parenthesized(nonemptyList(sourced(Parser{}))))) // 2.15.3.7 LINEAR (linear-list: linear-step) @@ -224,13 +130,29 @@ TYPE_PARSER(construct( // "VAL" >> pure(OmpLinearModifier::Value::Val) || "UVAL" >> pure(OmpLinearModifier::Value::Uval))) +TYPE_PARSER(construct( // + "MAPPER"_tok >> parenthesized(Parser{}))) + +// map-type -> ALLOC | DELETE | FROM | RELEASE | TO | TOFROM +TYPE_PARSER(construct( // + "ALLOC" >> pure(OmpMapType::Value::Alloc) || + "DELETE" >> pure(OmpMapType::Value::Delete) || + "FROM" >> pure(OmpMapType::Value::From) || + "RELEASE" >> pure(OmpMapType::Value::Release) || + "TO"_id >> pure(OmpMapType::Value::To) || + "TOFROM" >> pure(OmpMapType::Value::Tofrom))) + +// map-type-modifier -> ALWAYS | CLOSE | OMPX_HOLD | PRESENT +TYPE_PARSER(construct( + "ALWAYS" >> pure(OmpMapTypeModifier::Value::Always) || + "CLOSE" >> pure(OmpMapTypeModifier::Value::Close) || + "OMPX_HOLD" >> pure(OmpMapTypeModifier::Value::Ompx_Hold) || + "PRESENT" >> pure(OmpMapTypeModifier::Value::Present))) + // 2.15.3.6 REDUCTION (reduction-identifier: variable-name-list) TYPE_PARSER(construct(Parser{}) || construct(Parser{})) -TYPE_PARSER(construct( // - "SIMD" >> pure(OmpChunkModifier::Value::Simd))) - TYPE_PARSER(construct( "REPRODUCIBLE" >> pure(OmpOrderModifier::Value::Reproducible) || "UNCONSTRAINED" >> pure(OmpOrderModifier::Value::Unconstrained))) @@ -261,6 +183,17 @@ TYPE_PARSER(construct( "SCALAR" >> pure(OmpVariableCategory::Value::Scalar))) // This could be auto-generated. +TYPE_PARSER(sourced(construct( + sourced(construct(Parser{}) || + construct(Parser{}) || + construct(Parser{}))))) + +TYPE_PARSER(sourced(construct( + sourced(construct(Parser{}) || + construct(Parser{}) || + construct(Parser{}) || + construct(Parser{}))))) + TYPE_PARSER( sourced(construct(Parser{}))) @@ -273,11 +206,33 @@ TYPE_PARSER(sourced(construct(sourced( construct(Parser{}) || construct(Parser{}))))) +TYPE_PARSER(sourced(construct( + sourced(construct(Parser{}) || + construct(Parser{}) || + construct(Parser{}))))) + TYPE_PARSER(sourced( construct(Parser{}))) // --- Parsers for clauses -------------------------------------------- +/// `MOBClause` is a clause that has a +/// std::tuple. +/// Helper function to create a typical modifiers-objects clause, where the +/// commas separating individual modifiers are optional, and the clause +/// contains a bool member to indicate whether it was fully comma-separated +/// or not. +template +static inline MOBClause makeMobClause( + std::list &&mods, OmpObjectList &&objs) { + if (!mods.empty()) { + return MOBClause{std::move(mods), std::move(objs), CommaSeparated}; + } else { + using ListTy = std::list; + return MOBClause{std::optional{}, std::move(objs), CommaSeparated}; + } +} + // [5.0] 2.10.1 affinity([aff-modifier:] locator-list) // aff-modifier: interator-modifier TYPE_PARSER(construct( @@ -290,53 +245,18 @@ TYPE_PARSER(construct( "SHARED" >> pure(OmpDefaultClause::Type::Shared) || "NONE" >> pure(OmpDefaultClause::Type::None))) -// 2.5 PROC_BIND (MASTER | CLOSE | PRIMARY | SPREAD ) +// 2.5 PROC_BIND (MASTER | CLOSE | PRIMARY | SPREAD) TYPE_PARSER(construct( "CLOSE" >> pure(OmpProcBindClause::Type::Close) || "MASTER" >> pure(OmpProcBindClause::Type::Master) || "PRIMARY" >> pure(OmpProcBindClause::Type::Primary) || "SPREAD" >> pure(OmpProcBindClause::Type::Spread))) -// 2.15.5.1 map -> -// MAP ([ [map-type-modifiers [,] ] map-type : ] variable-name-list) -// map-type-modifiers -> map-type-modifier [,] [...] -// map-type-modifier -> ALWAYS | CLOSE | OMPX_HOLD | PRESENT -// map-type -> ALLOC | DELETE | FROM | RELEASE | TO | TOFROM -TYPE_PARSER(construct( - "ALWAYS" >> pure(OmpMapClause::TypeModifier::Always) || - "CLOSE" >> pure(OmpMapClause::TypeModifier::Close) || - "OMPX_HOLD" >> pure(OmpMapClause::TypeModifier::Ompx_Hold) || - "PRESENT" >> pure(OmpMapClause::TypeModifier::Present))) - -TYPE_PARSER( - construct("ALLOC" >> pure(OmpMapClause::Type::Alloc) || - "DELETE" >> pure(OmpMapClause::Type::Delete) || - "FROM" >> pure(OmpMapClause::Type::From) || - "RELEASE" >> pure(OmpMapClause::Type::Release) || - "TO"_id >> pure(OmpMapClause::Type::To) || - "TOFROM" >> pure(OmpMapClause::Type::Tofrom))) - -template -static inline OmpMapClause makeMapClause(OmpMapperIdentifier &&mm, - std::tuple>, - std::optional>, - std::optional>> &&mods, - OmpObjectList &&objs) { - auto &&[tm, it, ty] = std::move(mods); - return OmpMapClause{std::move(mm), std::move(tm), std::move(it), - std::move(ty), std::move(objs), CommasEverywhere}; -} - -TYPE_PARSER(construct( - maybe("MAPPER"_tok >> parenthesized(name) / ","_tok))) - TYPE_PARSER(construct( - applyFunction(makeMapClause, - Parser{}, MapModifiers(","_tok), - Parser{}) || - applyFunction(makeMapClause, - Parser{}, MapModifiers(maybe(","_tok)), - Parser{}))) + applyFunction(makeMobClause, + modifierList(","_tok), Parser{}) || + applyFunction(makeMobClause, + modifierList(maybe(","_tok)), Parser{}))) // [OpenMP 5.0] // 2.19.7.2 defaultmap(implicit-behavior[:variable-category]) @@ -463,30 +383,17 @@ TYPE_CONTEXT_PARSER("Omp Depend clause"_en_US, TYPE_CONTEXT_PARSER("Omp Doacross clause"_en_US, construct(Parser{})) -TYPE_PARSER(construct( - "PRESENT" >> pure(OmpFromClause::Expectation::Present))) - -template -static inline MotionClause makeMotionClause( - std::tuple>, - std::optional>> &&mods, - OmpObjectList &&objs) { - auto &&[exp, iter] = std::move(mods); - return MotionClause( - std::move(exp), std::move(iter), std::move(objs), CommasEverywhere); -} - TYPE_PARSER(construct( - applyFunction(makeMotionClause, - MotionModifiers(","_tok), Parser{}) || - applyFunction(makeMotionClause, - MotionModifiers(maybe(","_tok)), Parser{}))) + applyFunction(makeMobClause, + modifierList(","_tok), Parser{}) || + applyFunction(makeMobClause, + modifierList(maybe(","_tok)), Parser{}))) TYPE_PARSER(construct( - applyFunction(makeMotionClause, - MotionModifiers(","_tok), Parser{}) || - applyFunction(makeMotionClause, - MotionModifiers(maybe(","_tok)), Parser{}))) + applyFunction(makeMobClause, + modifierList(","_tok), Parser{}) || + applyFunction(makeMobClause, + modifierList(maybe(","_tok)), Parser{}))) TYPE_CONTEXT_PARSER("Omp LINEAR clause"_en_US, construct( diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 4881da848c347..fe3f6ce7aa629 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2084,6 +2084,11 @@ class UnparseVisitor { Walk(x.v); Put(")"); } + void Unparse(const OmpMapper &x) { + Word("MAPPER("); + Walk(x.v); + Put(")"); + } void Unparse(const OmpLastprivateClause &x) { Walk( std::get>(x.t), @@ -2091,46 +2096,8 @@ class UnparseVisitor { Walk(std::get(x.t)); } void Unparse(const OmpMapClause &x) { - auto &typeMod = - std::get>>(x.t); - auto &iter = std::get>>(x.t); - auto &type = std::get>>(x.t); - auto &mapper = std::get(x.t); - - // For a given list of items, if the item has a value, then walk it. - // Print commas between items that have values. - // Return 'true' if something did get printed, otherwise 'false'. - bool needComma{false}; - if (mapper.v) { - Word("MAPPER("); - Walk(*mapper.v); - Put(")"); - needComma = true; - } - if (typeMod) { - if (needComma) { - Put(", "); - } - Walk(*typeMod); - needComma = true; - } - if (iter) { - if (needComma) { - Put(", "); - } - Walk(*iter); - needComma = true; - } - if (type) { - if (needComma) { - Put(", "); - } - Walk(*type); - needComma = true; - } - if (needComma) { - Put(": "); - } + using Modifier = OmpMapClause::Modifier; + Walk(std::get>>(x.t), ": "); Walk(std::get(x.t)); } void Unparse(const OmpScheduleClause &x) { @@ -2153,24 +2120,8 @@ class UnparseVisitor { Walk(std::get>(x.t)); } void Unparse(const OmpFromClause &x) { - auto &expect{ - std::get>>(x.t)}; - auto &iter{std::get>>(x.t)}; - bool needComma{false}; - if (expect) { - Walk(*expect); - needComma = true; - } - if (iter) { - if (needComma) { - Put(", "); - } - Walk(*iter); - needComma = true; - } - if (needComma) { - Put(": "); - } + using Modifier = OmpFromClause::Modifier; + Walk(std::get>>(x.t), ": "); Walk(std::get(x.t)); } void Unparse(const OmpIfClause &x) { @@ -2257,24 +2208,8 @@ class UnparseVisitor { Walk(":", std::get>>(x.t)); } void Unparse(const OmpToClause &x) { - auto &expect{ - std::get>>(x.t)}; - auto &iter{std::get>>(x.t)}; - bool needComma{false}; - if (expect) { - Walk(*expect); - needComma = true; - } - if (iter) { - if (needComma) { - Put(", "); - } - Walk(*iter); - needComma = true; - } - if (needComma) { - Put(": "); - } + using Modifier = OmpToClause::Modifier; + Walk(std::get>>(x.t), ": "); Walk(std::get(x.t)); } #define GEN_FLANG_CLAUSE_UNPARSE @@ -2913,7 +2848,7 @@ class UnparseVisitor { WALK_NESTED_ENUM(OmpDeviceClause, DeviceModifier) // OMP device modifier WALK_NESTED_ENUM(OmpDeviceTypeClause, Type) // OMP DEVICE_TYPE WALK_NESTED_ENUM(OmpReductionModifier, Value) // OMP reduction-modifier - WALK_NESTED_ENUM(OmpFromClause, Expectation) // OMP motion-expectation + WALK_NESTED_ENUM(OmpExpectation, Value) // OMP motion-expectation WALK_NESTED_ENUM(OmpIfClause, DirectiveNameModifier) // OMP directive-modifier WALK_NESTED_ENUM(OmpCancelType, Type) // OMP cancel-type WALK_NESTED_ENUM(OmpOrderClause, Ordering) // OMP ordering @@ -2921,8 +2856,8 @@ class UnparseVisitor { WALK_NESTED_ENUM( OmpGrainsizeClause, Prescriptiveness) // OMP grainsize-modifier WALK_NESTED_ENUM(OmpNumTasksClause, Prescriptiveness) // OMP numtasks-modifier - WALK_NESTED_ENUM(OmpMapClause, Type) // OMP map-type - WALK_NESTED_ENUM(OmpMapClause, TypeModifier) // OMP map-type-modifier + WALK_NESTED_ENUM(OmpMapType, Value) // OMP map-type + WALK_NESTED_ENUM(OmpMapTypeModifier, Value) // OMP map-type-modifier #undef WALK_NESTED_ENUM void Unparse(const ReductionOperator::Operator x) { switch (x) { diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 832c9f2c7174d..3733ebfaf9492 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -2860,7 +2860,8 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Destroy &x) { void OmpStructureChecker::Enter(const parser::OmpClause::Reduction &x) { CheckAllowedClause(llvm::omp::Clause::OMPC_reduction); - if (OmpVerifyModifiers(x.v, GetContext().clauseSource, context_)) { + if (OmpVerifyModifiers(x.v, llvm::omp::OMPC_reduction, + GetContext().clauseSource, context_)) { if (CheckReductionOperators(x)) { CheckReductionTypeList(x); } @@ -2875,45 +2876,41 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Reduction &x) { bool OmpStructureChecker::CheckReductionOperators( const parser::OmpClause::Reduction &x) { + bool ok = false; auto &modifiers{OmpGetModifiers(x.v)}; - const auto *definedOp{ - OmpGetUniqueModifier(modifiers)}; - if (!definedOp) { - return false; + if (const auto *ident{ + OmpGetUniqueModifier(modifiers)}) { + + auto visitOperator{[&](const parser::DefinedOperator &dOpr) { + if (const auto *intrinsicOp{ + std::get_if( + &dOpr.u)}) { + ok = CheckIntrinsicOperator(*intrinsicOp); + } else { + context_.Say(GetContext().clauseSource, + "Invalid reduction operator in REDUCTION clause."_err_en_US, + ContextDirectiveAsFortran()); + } + }}; + + auto visitDesignator{[&](const parser::ProcedureDesignator &procD) { + const parser::Name *name{std::get_if(&procD.u)}; + if (name && name->symbol) { + const SourceName &realName{name->symbol->GetUltimate().name()}; + if (realName == "max" || realName == "min" || realName == "iand" || + realName == "ior" || realName == "ieor") { + ok = true; + } + } + if (!ok) { + context_.Say(GetContext().clauseSource, + "Invalid reduction identifier in REDUCTION " + "clause."_err_en_US, + ContextDirectiveAsFortran()); + } + }}; + common::visit(common::visitors{visitOperator, visitDesignator}, ident->u); } - bool ok = false; - common::visit( - common::visitors{ - [&](const parser::DefinedOperator &dOpr) { - if (const auto *intrinsicOp{ - std::get_if( - &dOpr.u)}) { - ok = CheckIntrinsicOperator(*intrinsicOp); - } else { - context_.Say(GetContext().clauseSource, - "Invalid reduction operator in REDUCTION clause."_err_en_US, - ContextDirectiveAsFortran()); - } - }, - [&](const parser::ProcedureDesignator &procD) { - const parser::Name *name{std::get_if(&procD.u)}; - if (name && name->symbol) { - const SourceName &realName{name->symbol->GetUltimate().name()}; - if (realName == "max" || realName == "min" || - realName == "iand" || realName == "ior" || - realName == "ieor") { - ok = true; - } - } - if (!ok) { - context_.Say(GetContext().clauseSource, - "Invalid reduction identifier in REDUCTION " - "clause."_err_en_US, - ContextDirectiveAsFortran()); - } - }, - }, - definedOp->u); return ok; } @@ -3405,7 +3402,8 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Defaultmap &x) { ThisVersion(version), TryVersion(50)); } } - if (!OmpVerifyModifiers(x.v, GetContext().clauseSource, context_)) { + if (!OmpVerifyModifiers(x.v, llvm::omp::OMPC_defaultmap, + GetContext().clauseSource, context_)) { // If modifier verification fails, return early. return; } @@ -3483,15 +3481,15 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Linear &x) { } void OmpStructureChecker::CheckAllowedMapTypes( - const parser::OmpMapClause::Type &type, - const std::list &allowedMapTypeList) { + const parser::OmpMapType::Value &type, + const std::list &allowedMapTypeList) { if (!llvm::is_contained(allowedMapTypeList, type)) { std::string commaSeparatedMapTypes; llvm::interleave( allowedMapTypeList.begin(), allowedMapTypeList.end(), - [&](const parser::OmpMapClause::Type &mapType) { + [&](const parser::OmpMapType::Value &mapType) { commaSeparatedMapTypes.append(parser::ToUpperCaseLetters( - parser::OmpMapClause::EnumToString(mapType))); + parser::OmpMapType::EnumToString(mapType))); }, [&] { commaSeparatedMapTypes.append(", "); }); context_.Say(GetContext().clauseSource, @@ -3503,40 +3501,23 @@ void OmpStructureChecker::CheckAllowedMapTypes( void OmpStructureChecker::Enter(const parser::OmpClause::Map &x) { CheckAllowedClause(llvm::omp::Clause::OMPC_map); - using TypeMod = parser::OmpMapClause::TypeModifier; - using Type = parser::OmpMapClause::Type; - using IterMod = parser::OmpIterator; + if (!OmpVerifyModifiers( + x.v, llvm::omp::OMPC_map, GetContext().clauseSource, context_)) { + return; + } + auto &modifiers{OmpGetModifiers(x.v)}; unsigned version{context_.langOptions().OpenMPVersion}; if (auto commas{std::get(x.v.t)}; !commas && version >= 52) { context_.Say(GetContext().clauseSource, "The specification of modifiers without comma separators for the " "'MAP' clause has been deprecated in OpenMP 5.2"_port_en_US); } - if (auto &mapTypeMod{std::get>>(x.v.t)}) { - if (auto *dup{FindDuplicateEntry(*mapTypeMod)}) { - context_.Say(GetContext().clauseSource, - "Duplicate map-type-modifier entry '%s' will be ignored"_warn_en_US, - parser::ToUpperCaseLetters(parser::OmpMapClause::EnumToString(*dup))); - } - } - // The size of any of the optional lists is never 0, instead of the list - // being empty, it will be a nullopt. - if (auto &iterMod{std::get>>(x.v.t)}) { - if (iterMod->size() != 1) { - context_.Say(GetContext().clauseSource, - "Only one iterator-modifier is allowed"_err_en_US); - } - CheckIteratorModifier(iterMod->front()); + if (auto *iter{OmpGetUniqueModifier(modifiers)}) { + CheckIteratorModifier(*iter); } - if (auto &mapType{std::get>>(x.v.t)}) { - if (mapType->size() != 1) { - context_.Say(GetContext().clauseSource, - "Multiple map types are not allowed"_err_en_US); - return; - } - parser::OmpMapClause::Type type{mapType->front()}; - + if (auto *type{OmpGetUniqueModifier(modifiers)}) { + using Value = parser::OmpMapType::Value; switch (GetContext().directive) { case llvm::omp::Directive::OMPD_target: case llvm::omp::Directive::OMPD_target_teams: @@ -3546,25 +3527,43 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Map &x) { case llvm::omp::Directive::OMPD_target_teams_distribute_parallel_do_simd: case llvm::omp::Directive::OMPD_target_data: CheckAllowedMapTypes( - type, {Type::To, Type::From, Type::Tofrom, Type::Alloc}); + type->v, {Value::To, Value::From, Value::Tofrom, Value::Alloc}); break; case llvm::omp::Directive::OMPD_target_enter_data: - CheckAllowedMapTypes(type, {Type::To, Type::Alloc}); + CheckAllowedMapTypes(type->v, {Value::To, Value::Alloc}); break; case llvm::omp::Directive::OMPD_target_exit_data: - CheckAllowedMapTypes(type, {Type::From, Type::Release, Type::Delete}); + CheckAllowedMapTypes( + type->v, {Value::From, Value::Release, Value::Delete}); break; default: break; } } + + auto &&typeMods{ + OmpGetRepeatableModifier(modifiers)}; + struct Less { + using Iterator = decltype(typeMods.begin()); + bool operator()(Iterator a, Iterator b) const { + const parser::OmpMapTypeModifier *pa = *a; + const parser::OmpMapTypeModifier *pb = *b; + return pa->v < pb->v; + } + }; + if (auto maybeIter{FindDuplicate(typeMods)}) { + context_.Say(GetContext().clauseSource, + "Duplicate map-type-modifier entry '%s' will be ignored"_warn_en_US, + parser::ToUpperCaseLetters( + parser::OmpMapTypeModifier::EnumToString((**maybeIter)->v))); + } } void OmpStructureChecker::Enter(const parser::OmpClause::Schedule &x) { CheckAllowedClause(llvm::omp::Clause::OMPC_schedule); const parser::OmpScheduleClause &scheduleClause = x.v; - if (!OmpVerifyModifiers( - scheduleClause, GetContext().clauseSource, context_)) { + if (!OmpVerifyModifiers(scheduleClause, llvm::omp::OMPC_schedule, + GetContext().clauseSource, context_)) { return; } @@ -3734,8 +3733,8 @@ void OmpStructureChecker::CheckDoacross(const parser::OmpDoacross &doa) { // Check if the variables in the iteration vector are unique. struct Less { - bool operator()( - const parser::OmpIteration *a, const parser::OmpIteration *b) const { + using Iterator = std::list::const_iterator; + bool operator()(Iterator a, Iterator b) const { auto namea{std::get(a->t)}; auto nameb{std::get(b->t)}; assert(namea.symbol && nameb.symbol && "Unresolved symbols"); @@ -3745,8 +3744,8 @@ void OmpStructureChecker::CheckDoacross(const parser::OmpDoacross &doa) { reinterpret_cast(nameb.symbol); } }; - if (auto *duplicate{FindDuplicateEntry(vec)}) { - auto name{std::get(duplicate->t)}; + if (auto maybeIter{FindDuplicate(vec)}) { + auto name{std::get((*maybeIter)->t)}; context_.Say(name.source, "Duplicate variable '%s' in the iteration vector"_err_en_US, name.ToString()); @@ -4069,35 +4068,16 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Enter &x) { void OmpStructureChecker::Enter(const parser::OmpClause::From &x) { CheckAllowedClause(llvm::omp::Clause::OMPC_from); - unsigned version{context_.langOptions().OpenMPVersion}; - using ExpMod = parser::OmpFromClause::Expectation; - using IterMod = parser::OmpIterator; - - if (auto &expMod{std::get>>(x.v.t)}) { - unsigned allowedInVersion{51}; - if (version < allowedInVersion) { - context_.Say(GetContext().clauseSource, - "The PRESENT modifier is not supported in %s, %s"_warn_en_US, - ThisVersion(version), TryVersion(allowedInVersion)); - } - if (expMod->size() != 1) { - context_.Say(GetContext().clauseSource, - "Only one PRESENT modifier is allowed"_err_en_US); - } + if (!OmpVerifyModifiers( + x.v, llvm::omp::OMPC_from, GetContext().clauseSource, context_)) { + return; } - if (auto &iterMod{std::get>>(x.v.t)}) { - unsigned allowedInVersion{51}; - if (version < allowedInVersion) { - context_.Say(GetContext().clauseSource, - "Iterator modifiers are not supported in %s, %s"_warn_en_US, - ThisVersion(version), TryVersion(allowedInVersion)); - } - if (iterMod->size() != 1) { - context_.Say(GetContext().clauseSource, - "Only one iterator-modifier is allowed"_err_en_US); - } - CheckIteratorModifier(iterMod->front()); + auto &modifiers{OmpGetModifiers(x.v)}; + unsigned version{context_.langOptions().OpenMPVersion}; + + if (auto *iter{OmpGetUniqueModifier(modifiers)}) { + CheckIteratorModifier(*iter); } const auto &objList{std::get(x.v.t)}; @@ -4121,6 +4101,12 @@ void OmpStructureChecker::Enter(const parser::OmpClause::From &x) { void OmpStructureChecker::Enter(const parser::OmpClause::To &x) { CheckAllowedClause(llvm::omp::Clause::OMPC_to); + if (!OmpVerifyModifiers( + x.v, llvm::omp::OMPC_to, GetContext().clauseSource, context_)) { + return; + } + + auto &modifiers{OmpGetModifiers(x.v)}; unsigned version{context_.langOptions().OpenMPVersion}; // The "to" clause is only allowed on "declare target" (pre-5.1), and @@ -4133,35 +4119,10 @@ void OmpStructureChecker::Enter(const parser::OmpClause::To &x) { if (GetContext().directive == llvm::omp::OMPD_declare_target) { return; } - assert(GetContext().directive == llvm::omp::OMPD_target_update); - using ExpMod = parser::OmpFromClause::Expectation; - using IterMod = parser::OmpIterator; - if (auto &expMod{std::get>>(x.v.t)}) { - unsigned allowedInVersion{51}; - if (version < allowedInVersion) { - context_.Say(GetContext().clauseSource, - "The PRESENT modifier is not supported in %s, %s"_warn_en_US, - ThisVersion(version), TryVersion(allowedInVersion)); - } - if (expMod->size() != 1) { - context_.Say(GetContext().clauseSource, - "Only one PRESENT modifier is allowed"_err_en_US); - } - } - - if (auto &iterMod{std::get>>(x.v.t)}) { - unsigned allowedInVersion{51}; - if (version < allowedInVersion) { - context_.Say(GetContext().clauseSource, - "Iterator modifiers are not supported in %s, %s"_warn_en_US, - ThisVersion(version), TryVersion(allowedInVersion)); - } - if (iterMod->size() != 1) { - context_.Say(GetContext().clauseSource, - "Only one iterator-modifier is allowed"_err_en_US); - } - CheckIteratorModifier(iterMod->front()); + assert(GetContext().directive == llvm::omp::OMPD_target_update); + if (auto *iter{OmpGetUniqueModifier(modifiers)}) { + CheckIteratorModifier(*iter); } const auto &objList{std::get(x.v.t)}; diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index cd0a475937613..4ce52bebd5a73 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -161,16 +161,15 @@ class OmpStructureChecker void HasInvalidDistributeNesting(const parser::OpenMPLoopConstruct &x); void HasInvalidLoopBinding(const parser::OpenMPLoopConstruct &x); // specific clause related - void CheckAllowedMapTypes(const parser::OmpMapClause::Type &, - const std::list &); + void CheckAllowedMapTypes(const parser::OmpMapType::Value &, + const std::list &); llvm::StringRef getClauseName(llvm::omp::Clause clause) override; llvm::StringRef getDirectiveName(llvm::omp::Directive directive) override; - template struct DefaultLess { - bool operator()(const T *a, const T *b) const { return *a < *b; } - }; - template > - const T *FindDuplicateEntry(const std::list &); + template < // + typename LessTy, typename RangeTy, + typename IterTy = decltype(std::declval().begin())> + std::optional FindDuplicate(RangeTy &&); void CheckDependList(const parser::DataRef &); void CheckDependArraySection( @@ -274,22 +273,20 @@ class OmpStructureChecker std::vector loopStack_; }; -template -const T *OmpStructureChecker::FindDuplicateEntry(const std::list &list) { - // Add elements of the list to a set. If the insertion fails, return - // the address of the failing element. - - // The objects of type T may not be copyable, so add their addresses - // to the set. The set will need to compare the actual objects, so - // the custom comparator is provided. - std::set uniq; - - for (const T &item : list) { - if (!uniq.insert(&item).second) { - return &item; +/// Find a duplicate entry in the range, and return an iterator to it. +/// If there are no duplicate entries, return nullopt. +template +std::optional OmpStructureChecker::FindDuplicate(RangeTy &&range) { + // Deal with iterators, since the actual elements may be rvalues (i.e. + // have no addresses), for example with custom-constructed ranges that + // are not simple c.begin()..c.end(). + std::set uniq; + for (auto it{range.begin()}, end{range.end()}; it != end; ++it) { + if (!uniq.insert(it).second) { + return it; } } - return nullptr; + return std::nullopt; } } // namespace Fortran::semantics diff --git a/flang/lib/Semantics/openmp-modifiers.cpp b/flang/lib/Semantics/openmp-modifiers.cpp index e0d73e605c73b..1fd2358aa594e 100644 --- a/flang/lib/Semantics/openmp-modifiers.cpp +++ b/flang/lib/Semantics/openmp-modifiers.cpp @@ -40,7 +40,13 @@ static unsigned findVersion( } } - assert(found != 0 && "cannot locate entry for version in map"); + // It can happen that the above search will not find any version, for + // example when the minimum version in the map is higher than the current + // version. This is really an error, but this situation should be handled + // gracefully, so make some sensible choice and return it. + if (found == 0) { + found = !map.empty() ? map.begin()->first : versions.front(); + } return found; } @@ -52,6 +58,19 @@ const OmpClauses &OmpModifierDescriptor::clauses(unsigned version) const { return clauses_.at(findVersion(version, clauses_)); } +unsigned OmpModifierDescriptor::since(llvm::omp::Clause id) const { + unsigned found{[&]() { + for (auto &[v, cs] : clauses_) { + if (cs.test(id)) { + return v; + } + } + return ~0u; + }()}; + + return found <= 45 ? 0 : found; +} + // Note: The intent for these functions is to have them be automatically- // generated in the future. @@ -89,6 +108,22 @@ const OmpModifierDescriptor &OmpGetDescriptor() { return desc; } +template <> +const OmpModifierDescriptor &OmpGetDescriptor() { + static const OmpModifierDescriptor desc{ + /*name=*/"expectation", + /*props=*/ + { + {51, {OmpProperty::Unique}}, + }, + /*clauses=*/ + { + {51, {Clause::OMPC_from, Clause::OMPC_to}}, + }, + }; + return desc; +} + template <> const OmpModifierDescriptor &OmpGetDescriptor() { static const OmpModifierDescriptor desc{ @@ -124,6 +159,54 @@ const OmpModifierDescriptor &OmpGetDescriptor() { return desc; } +template <> // +const OmpModifierDescriptor &OmpGetDescriptor() { + static const OmpModifierDescriptor desc{ + /*name=*/"mapper", + /*props=*/ + { + {50, {OmpProperty::Unique}}, + }, + /*clauses=*/ + { + {50, {Clause::OMPC_from, Clause::OMPC_map, Clause::OMPC_to}}, + }, + }; + return desc; +} + +template <> +const OmpModifierDescriptor &OmpGetDescriptor() { + static const OmpModifierDescriptor desc{ + /*name=*/"map-type", + /*props=*/ + { + {45, {OmpProperty::Ultimate}}, + }, + /*clauses=*/ + { + {45, {Clause::OMPC_map}}, + }, + }; + return desc; +} + +template <> +const OmpModifierDescriptor &OmpGetDescriptor() { + static const OmpModifierDescriptor desc{ + /*name=*/"map-type-modifier", + /*props=*/ + { + {45, {}}, // Repeatable + }, + /*clauses=*/ + { + {45, {Clause::OMPC_map}}, + }, + }; + return desc; +} + template <> const OmpModifierDescriptor &OmpGetDescriptor() { static const OmpModifierDescriptor desc{ diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index c75808a8963b3..0c3708b3fd29b 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -522,49 +522,47 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { const auto &objList{std::get(x.v.t)}; ResolveOmpObjectList(objList, Symbol::Flag::OmpReduction); - auto &modifiers{OmpGetModifiers(x.v)}; - if (!modifiers) { - return false; - } - - auto createDummyProcSymbol = [&](const parser::Name *name) { - // If name resolution failed, create a dummy symbol - const auto namePair{ - currScope().try_emplace(name->source, Attrs{}, ProcEntityDetails{})}; - auto &newSymbol{*namePair.first->second}; - if (context_.intrinsics().IsIntrinsic(name->ToString())) { - newSymbol.attrs().set(Attr::INTRINSIC); - } - name->symbol = &newSymbol; - }; + if (auto &modifiers{OmpGetModifiers(x.v)}) { + auto createDummyProcSymbol = [&](const parser::Name *name) { + // If name resolution failed, create a dummy symbol + const auto namePair{currScope().try_emplace( + name->source, Attrs{}, ProcEntityDetails{})}; + auto &newSymbol{*namePair.first->second}; + if (context_.intrinsics().IsIntrinsic(name->ToString())) { + newSymbol.attrs().set(Attr::INTRINSIC); + } + name->symbol = &newSymbol; + }; - for (auto &mod : *modifiers) { - if (!std::holds_alternative(mod.u)) { - continue; - } - auto &opr{std::get(mod.u)}; - if (auto *procD{parser::Unwrap(opr.u)}) { - if (auto *name{parser::Unwrap(procD->u)}) { - if (!name->symbol) { - if (!ResolveName(name)) { - createDummyProcSymbol(name); + for (auto &mod : *modifiers) { + if (!std::holds_alternative(mod.u)) { + continue; + } + auto &opr{std::get(mod.u)}; + if (auto *procD{parser::Unwrap(opr.u)}) { + if (auto *name{parser::Unwrap(procD->u)}) { + if (!name->symbol) { + if (!ResolveName(name)) { + createDummyProcSymbol(name); + } } } - } - if (auto *procRef{parser::Unwrap(procD->u)}) { - if (!procRef->v.thing.component.symbol) { - if (!ResolveName(&procRef->v.thing.component)) { - createDummyProcSymbol(&procRef->v.thing.component); + if (auto *procRef{ + parser::Unwrap(procD->u)}) { + if (!procRef->v.thing.component.symbol) { + if (!ResolveName(&procRef->v.thing.component)) { + createDummyProcSymbol(&procRef->v.thing.component); + } } } } } - } - using ReductionModifier = parser::OmpReductionModifier; - if (auto *maybeModifier{ - OmpGetUniqueModifier(modifiers)}) { - if (maybeModifier->v == ReductionModifier::Value::Inscan) { - ResolveOmpObjectList(objList, Symbol::Flag::OmpInScanReduction); + using ReductionModifier = parser::OmpReductionModifier; + if (auto *maybeModifier{ + OmpGetUniqueModifier(modifiers)}) { + if (maybeModifier->v == ReductionModifier::Value::Inscan) { + ResolveOmpObjectList(objList, Symbol::Flag::OmpInScanReduction); + } } } return false; @@ -643,28 +641,25 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { void Post(const parser::OmpMapClause &x) { Symbol::Flag ompFlag = Symbol::Flag::OmpMapToFrom; - // There is only one `type' allowed, but it's parsed as a list. Multiple - // types are diagnosed in the semantic checks for OpenMP. - if (const auto &mapType{ - std::get>>( - x.t)}) { - switch (mapType->front()) { - case parser::OmpMapClause::Type::To: + auto &mods{OmpGetModifiers(x)}; + if (auto *mapType{OmpGetUniqueModifier(mods)}) { + switch (mapType->v) { + case parser::OmpMapType::Value::To: ompFlag = Symbol::Flag::OmpMapTo; break; - case parser::OmpMapClause::Type::From: + case parser::OmpMapType::Value::From: ompFlag = Symbol::Flag::OmpMapFrom; break; - case parser::OmpMapClause::Type::Tofrom: + case parser::OmpMapType::Value::Tofrom: ompFlag = Symbol::Flag::OmpMapToFrom; break; - case parser::OmpMapClause::Type::Alloc: + case parser::OmpMapType::Value::Alloc: ompFlag = Symbol::Flag::OmpMapAlloc; break; - case parser::OmpMapClause::Type::Release: + case parser::OmpMapType::Value::Release: ompFlag = Symbol::Flag::OmpMapRelease; break; - case parser::OmpMapClause::Type::Delete: + case parser::OmpMapType::Value::Delete: ompFlag = Symbol::Flag::OmpMapDelete; break; } diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 929d35a4717dc..b576f59e8c7e5 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -31,6 +31,7 @@ #include "flang/Parser/tools.h" #include "flang/Semantics/attr.h" #include "flang/Semantics/expression.h" +#include "flang/Semantics/openmp-modifiers.h" #include "flang/Semantics/program-tree.h" #include "flang/Semantics/scope.h" #include "flang/Semantics/semantics.h" @@ -1642,27 +1643,27 @@ bool OmpVisitor::Pre(const parser::OpenMPDeclareMapperConstruct &x) { } bool OmpVisitor::Pre(const parser::OmpMapClause &x) { - const auto &mid{std::get(x.t)}; - if (const auto &mapperName{mid.v}) { - if (const auto symbol = FindSymbol(currScope(), *mapperName)) { + auto &mods{OmpGetModifiers(x)}; + if (auto *mapper{OmpGetUniqueModifier(mods)}) { + if (auto *symbol{FindSymbol(currScope(), mapper->v)}) { // TODO: Do we need a specific flag or type here, to distinghuish against // other ConstructName things? Leaving this for the full implementation // of mapper lowering. auto *misc{symbol->detailsIf()}; if (!misc || misc->kind() != MiscDetails::Kind::ConstructName) - context().Say(mapperName->source, - "Name '%s' should be a mapper name"_err_en_US, mapperName->source); + context().Say(mapper->v.source, + "Name '%s' should be a mapper name"_err_en_US, mapper->v.source); else - mapperName->symbol = symbol; + mapper->v.symbol = symbol; } else { - mapperName->symbol = &MakeSymbol( - *mapperName, MiscDetails{MiscDetails::Kind::ConstructName}); + mapper->v.symbol = + &MakeSymbol(mapper->v, MiscDetails{MiscDetails::Kind::ConstructName}); // TODO: When completing the implementation, we probably want to error if // the symbol is not declared, but right now, testing that the TODO for - // OmpMapclause happens is obscured by the TODO for declare mapper, so + // OmpMapClause happens is obscured by the TODO for declare mapper, so // leaving this out. Remove the above line once the declare mapper is - // implemented. context().Say(mapperName->source, "'%s' not - // declared"_err_en_US, mapperName->source); + // implemented. context().Say(mapper->v.source, "'%s' not + // declared"_err_en_US, mapper->v.source); } } return true; diff --git a/flang/runtime/CUDA/CMakeLists.txt b/flang/runtime/CUDA/CMakeLists.txt index ce87f3efdc363..3a88824826de3 100644 --- a/flang/runtime/CUDA/CMakeLists.txt +++ b/flang/runtime/CUDA/CMakeLists.txt @@ -18,6 +18,7 @@ add_flang_library(${CUFRT_LIBNAME} allocatable.cpp descriptor.cpp kernel.cpp + memmove-function.cpp memory.cpp registration.cpp ) diff --git a/flang/runtime/CUDA/allocatable.cpp b/flang/runtime/CUDA/allocatable.cpp index 649ddb638abe6..9be54e8906903 100644 --- a/flang/runtime/CUDA/allocatable.cpp +++ b/flang/runtime/CUDA/allocatable.cpp @@ -7,10 +7,12 @@ //===----------------------------------------------------------------------===// #include "flang/Runtime/CUDA/allocatable.h" +#include "../assign-impl.h" #include "../stat.h" #include "../terminator.h" #include "flang/Runtime/CUDA/common.h" #include "flang/Runtime/CUDA/descriptor.h" +#include "flang/Runtime/CUDA/memmove-function.h" #include "flang/Runtime/allocatable.h" #include "cuda_runtime.h" @@ -20,8 +22,27 @@ namespace Fortran::runtime::cuda { extern "C" { RT_EXT_API_GROUP_BEGIN -int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, bool hasStat, - const Descriptor *errMsg, const char *sourceFile, int sourceLine) { +int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t stream, + bool hasStat, const Descriptor *errMsg, const char *sourceFile, + int sourceLine) { + int stat{RTNAME(CUFAllocatableAllocate)( + desc, stream, hasStat, errMsg, sourceFile, sourceLine)}; +#ifndef RT_DEVICE_COMPILATION + // Descriptor synchronization is only done when the allocation is done + // from the host. + if (stat == StatOk) { + void *deviceAddr{ + RTNAME(CUFGetDeviceAddress)((void *)&desc, sourceFile, sourceLine)}; + RTNAME(CUFDescriptorSync) + ((Descriptor *)deviceAddr, &desc, sourceFile, sourceLine); + } +#endif + return stat; +} + +int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t stream, + bool hasStat, const Descriptor *errMsg, const char *sourceFile, + int sourceLine) { if (desc.HasAddendum()) { Terminator terminator{sourceFile, sourceLine}; // TODO: This require a bit more work to set the correct type descriptor @@ -32,16 +53,32 @@ int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, bool hasStat, // Perform the standard allocation. int stat{RTNAME(AllocatableAllocate)( desc, hasStat, errMsg, sourceFile, sourceLine)}; -#ifndef RT_DEVICE_COMPILATION - // Descriptor synchronization is only done when the allocation is done - // from the host. + return stat; +} + +int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc, + const Descriptor &source, int64_t stream, bool hasStat, + const Descriptor *errMsg, const char *sourceFile, int sourceLine) { + int stat{RTNAME(CUFAllocatableAllocate)( + alloc, stream, hasStat, errMsg, sourceFile, sourceLine)}; if (stat == StatOk) { - void *deviceAddr{ - RTNAME(CUFGetDeviceAddress)((void *)&desc, sourceFile, sourceLine)}; - RTNAME(CUFDescriptorSync) - ((Descriptor *)deviceAddr, &desc, sourceFile, sourceLine); + Terminator terminator{sourceFile, sourceLine}; + Fortran::runtime::DoFromSourceAssign( + alloc, source, terminator, &MemmoveHostToDevice); + } + return stat; +} + +int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc, + const Descriptor &source, int64_t stream, bool hasStat, + const Descriptor *errMsg, const char *sourceFile, int sourceLine) { + int stat{RTNAME(CUFAllocatableAllocateSync)( + alloc, stream, hasStat, errMsg, sourceFile, sourceLine)}; + if (stat == StatOk) { + Terminator terminator{sourceFile, sourceLine}; + Fortran::runtime::DoFromSourceAssign( + alloc, source, terminator, &MemmoveHostToDevice); } -#endif return stat; } diff --git a/flang/runtime/CUDA/memmove-function.cpp b/flang/runtime/CUDA/memmove-function.cpp new file mode 100644 index 0000000000000..3ba9fa7e0f7f7 --- /dev/null +++ b/flang/runtime/CUDA/memmove-function.cpp @@ -0,0 +1,35 @@ +//===-- runtime/CUDA/memmove-function.cpp ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Runtime/CUDA/memmove-function.h" +#include "../terminator.h" +#include "flang/Runtime/CUDA/common.h" + +#include "cuda_runtime.h" + +namespace Fortran::runtime::cuda { + +void *MemmoveHostToDevice(void *dst, const void *src, std::size_t count) { + // TODO: Use cudaMemcpyAsync when we have support for stream. + CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyHostToDevice)); + return dst; +} + +void *MemmoveDeviceToHost(void *dst, const void *src, std::size_t count) { + // TODO: Use cudaMemcpyAsync when we have support for stream. + CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToHost)); + return dst; +} + +void *MemmoveDeviceToDevice(void *dst, const void *src, std::size_t count) { + // TODO: Use cudaMemcpyAsync when we have support for stream. + CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToDevice)); + return dst; +} + +} // namespace Fortran::runtime::cuda diff --git a/flang/runtime/CUDA/memory.cpp b/flang/runtime/CUDA/memory.cpp index 68963c4d7738a..0bbb493d2db91 100644 --- a/flang/runtime/CUDA/memory.cpp +++ b/flang/runtime/CUDA/memory.cpp @@ -11,31 +11,12 @@ #include "../terminator.h" #include "flang/Runtime/CUDA/common.h" #include "flang/Runtime/CUDA/descriptor.h" +#include "flang/Runtime/CUDA/memmove-function.h" #include "flang/Runtime/assign.h" #include "cuda_runtime.h" namespace Fortran::runtime::cuda { -static void *MemmoveHostToDevice( - void *dst, const void *src, std::size_t count) { - // TODO: Use cudaMemcpyAsync when we have support for stream. - CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyHostToDevice)); - return dst; -} - -static void *MemmoveDeviceToHost( - void *dst, const void *src, std::size_t count) { - // TODO: Use cudaMemcpyAsync when we have support for stream. - CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToHost)); - return dst; -} - -static void *MemmoveDeviceToDevice( - void *dst, const void *src, std::size_t count) { - // TODO: Use cudaMemcpyAsync when we have support for stream. - CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToDevice)); - return dst; -} extern "C" { diff --git a/flang/test/Driver/print-supported-cpus.f90 b/flang/test/Driver/print-supported-cpus.f90 index 13688b193f9b9..60b725d4e1dcf 100644 --- a/flang/test/Driver/print-supported-cpus.f90 +++ b/flang/test/Driver/print-supported-cpus.f90 @@ -18,11 +18,11 @@ ! RUN: %flang --target=aarch64-unknown-linux-gnu --print-supported-cpus 2>&1 \ ! RUN: | FileCheck %s --check-prefixes=AARCH64,CHECK \ ! RUN: %} -! RUN: %if x86-registered-target %{ \ +! RUN: %if aarch64-registered-target %{ \ ! RUN: %flang --target=aarch64-unknown-linux-gnu -mcpu=help 2>&1 \ ! RUN: | FileCheck %s --check-prefixes=AARCH64,CHECK \ ! RUN: %} -! RUN: %if x86-registered-target %{ \ +! RUN: %if aarch64-registered-target %{ \ ! RUN: %flang --target=aarch64-unknown-linux-gnu -mtune=help 2>&1 \ ! RUN: | FileCheck %s --check-prefixes=AARCH64,CHECK \ ! RUN: %} diff --git a/flang/test/Fir/CUDA/cuda-allocate.fir b/flang/test/Fir/CUDA/cuda-allocate.fir index d68ff894d5af5..9b87c7546d1e9 100644 --- a/flang/test/Fir/CUDA/cuda-allocate.fir +++ b/flang/test/Fir/CUDA/cuda-allocate.fir @@ -19,7 +19,7 @@ func.func @_QPsub1() { // CHECK: %[[DESC:.*]] = fir.convert %[[DESC_RT_CALL]] : (!fir.ref>) -> !fir.ref>>> // CHECK: %[[DECL_DESC:.*]]:2 = hlfir.declare %[[DESC]] {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) // CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DECL_DESC]]#1 : (!fir.ref>>>) -> !fir.ref> -// CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 // CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DECL_DESC]]#1 : (!fir.ref>>>) -> !fir.ref> // CHECK: %{{.*}} = fir.call @_FortranAAllocatableDeallocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 @@ -47,7 +47,7 @@ func.func @_QPsub3() { // CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_ADDR]] {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMmod1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) // CHECK: %[[A_BOX:.*]] = fir.convert %[[A]]#1 : (!fir.ref>>>) -> !fir.ref> -// CHECK: fir.call @_FortranACUFAllocatableAllocate(%[[A_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[A_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 // CHECK: %[[A_BOX:.*]] = fir.convert %[[A]]#1 : (!fir.ref>>>) -> !fir.ref> // CHECK: fir.call @_FortranACUFAllocatableDeallocate(%[[A_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 @@ -87,7 +87,7 @@ func.func @_QPsub5() { } // CHECK-LABEL: func.func @_QPsub5() -// CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: fir.call @_FortranACUFAllocatableAllocate({{.*}}) : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 // CHECK: fir.call @_FortranAAllocatableDeallocate({{.*}}) : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 @@ -118,6 +118,67 @@ func.func @_QQsub6() attributes {fir.bindc_name = "test"} { // CHECK: %[[B:.*]]:2 = hlfir.declare %[[B_ADDR]] {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMdataEb"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) // CHECK: _FortranAAllocatableSetBounds // CHECK: %[[B_BOX:.*]] = fir.convert %[[B]]#1 : (!fir.ref>>>) -> !fir.ref> -// CHECK: fir.call @_FortranACUFAllocatableAllocate(%[[B_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[B_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 + + +func.func @_QPallocate_source() { + %c0_i64 = arith.constant 0 : i64 + %c1_i32 = arith.constant 1 : i32 + %c0_i32 = arith.constant 0 : i32 + %c1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %0 = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFallocate_sourceEa"} + %4 = fir.declare %0 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFallocate_sourceEa"} : (!fir.ref>>>) -> !fir.ref>>> + %5 = cuf.alloc !fir.box>> {bindc_name = "a_d", data_attr = #cuf.cuda, uniq_name = "_QFallocate_sourceEa_d"} -> !fir.ref>>> + %7 = fir.declare %5 {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFallocate_sourceEa_d"} : (!fir.ref>>>) -> !fir.ref>>> + %8 = fir.load %4 : !fir.ref>>> + %22 = cuf.allocate %7 : !fir.ref>>> source(%8 : !fir.box>>) {data_attr = #cuf.cuda} -> i32 + return +} + +// CHECK-LABEL: func.func @_QPallocate_source() +// CHECK: %[[DECL_HOST:.*]] = fir.declare %{{.*}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFallocate_sourceEa"} : (!fir.ref>>>) -> !fir.ref>>> +// CHECK: %[[DECL_DEV:.*]] = fir.declare %{{.*}} {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFallocate_sourceEa_d"} : (!fir.ref>>>) -> !fir.ref>>> +// CHECK: %[[SOURCE:.*]] = fir.load %[[DECL_HOST]] : !fir.ref>>> +// CHECK: %[[DEV_CONV:.*]] = fir.convert %[[DECL_DEV]] : (!fir.ref>>>) -> !fir.ref> +// CHECK: %[[SOURCE_CONV:.*]] = fir.convert %[[SOURCE]] : (!fir.box>>) -> !fir.box +// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocateSource(%[[DEV_CONV]], %[[SOURCE_CONV]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.box, i64, i1, !fir.box, !fir.ref, i32) -> i32 + + +fir.global @_QMmod1Ea_d {data_attr = #cuf.cuda} : !fir.box>> { + %c0 = arith.constant 0 : index + %0 = fir.zero_bits !fir.heap> + %1 = fir.shape %c0, %c0 : (index, index) -> !fir.shape<2> + %2 = fir.embox %0(%1) {allocator_idx = 2 : i32} : (!fir.heap>, !fir.shape<2>) -> !fir.box>> + fir.has_value %2 : !fir.box>> +} +func.func @_QMmod1Pallocate_source_global() { + %0 = fir.address_of(@_QMmod1Ea_d) : !fir.ref>>> + %1 = fir.declare %0 {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMmod1Ea_d"} : (!fir.ref>>>) -> !fir.ref>>> + %2 = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QMmod1Fallocate_source_globalEa"} + %6 = fir.declare %2 {fortran_attrs = #fir.var_attrs, uniq_name = "_QMmod1Fallocate_source_globalEa"} : (!fir.ref>>>) -> !fir.ref>>> + %7 = fir.load %6 : !fir.ref>>> + %21 = cuf.allocate %1 : !fir.ref>>> source(%7 : !fir.box>>) {data_attr = #cuf.cuda} -> i32 + return +} + +// CHECK-LABEL: func.func @_QMmod1Pallocate_source_global() +// CHECK: fir.call @_FortranACUFAllocatableAllocateSourceSync + +func.func @_QQallocate_stream() { + %0 = cuf.alloc !fir.box>> {bindc_name = "a", data_attr = #cuf.cuda, uniq_name = "_QFEa"} -> !fir.ref>>> + %1 = fir.declare %0 {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFEa"} : (!fir.ref>>>) -> !fir.ref>>> + %2 = fir.alloca i64 {bindc_name = "stream1", uniq_name = "_QFEstream1"} + %3 = fir.declare %2 {uniq_name = "_QFEstream1"} : (!fir.ref) -> !fir.ref + %4 = fir.load %3 : !fir.ref + %5 = cuf.allocate %1 : !fir.ref>>> stream(%4 : i64) {data_attr = #cuf.cuda} -> i32 + return +} + +// CHECK-LABEL: func.func @_QQallocate_stream() +// CHECK: %[[STREAM_ALLOCA:.*]] = fir.alloca i64 {bindc_name = "stream1", uniq_name = "_QFEstream1"} +// CHECK: %[[STREAM:.*]] = fir.declare %[[STREAM_ALLOCA]] {uniq_name = "_QFEstream1"} : (!fir.ref) -> !fir.ref +// CHECK: %[[STREAM_LOAD:.*]] = fir.load %[[STREAM]] : !fir.ref +// CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %[[STREAM_LOAD]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 } // end of module diff --git a/flang/test/Fir/struct-return-aarch64.fir b/flang/test/Fir/struct-return-aarch64.fir new file mode 100644 index 0000000000000..8b75c2cac7b6b --- /dev/null +++ b/flang/test/Fir/struct-return-aarch64.fir @@ -0,0 +1,229 @@ +// Test AArch64 ABI rewrite of struct returned by value (BIND(C), VALUE derived types). +// RUN: fir-opt --target-rewrite="target=aarch64-unknown-linux-gnu" %s | FileCheck %s + +!composite = !fir.type +// CHECK-LABEL: func.func private @test_composite() -> !fir.array<2xi64> +func.func private @test_composite() -> !composite +// CHECK-LABEL: func.func @test_call_composite( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref>) +func.func @test_call_composite(%arg0 : !fir.ref) { + // CHECK: %[[OUT:.*]] = fir.call @test_composite() : () -> !fir.array<2xi64> + // CHECK: %[[STACK:.*]] = llvm.intr.stacksave : !llvm.ptr + // CHECK: %[[ARR:.*]] = fir.alloca !fir.array<2xi64> + // CHECK: fir.store %[[OUT]] to %[[ARR]] : !fir.ref> + // CHECK: %[[CVT:.*]] = fir.convert %[[ARR]] : (!fir.ref>) -> !fir.ref> + // CHECK: %[[LD:.*]] = fir.load %[[CVT]] : !fir.ref> + // CHECK: llvm.intr.stackrestore %[[STACK]] : !llvm.ptr + %out = fir.call @test_composite() : () -> !composite + // CHECK: fir.store %[[LD]] to %[[ARG0]] : !fir.ref> + fir.store %out to %arg0 : !fir.ref + // CHECK: return + return +} + +!hfa_f16 = !fir.type +// CHECK-LABEL: func.func private @test_hfa_f16() -> !fir.type +func.func private @test_hfa_f16() -> !hfa_f16 +// CHECK-LABEL: func.func @test_call_hfa_f16( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref>) { +func.func @test_call_hfa_f16(%arg0 : !fir.ref) { + // CHECK: %[[OUT:.*]] = fir.call @test_hfa_f16() : () -> !fir.type + // CHECK: %[[STACK:.*]] = llvm.intr.stacksave : !llvm.ptr + // CHECK: %[[ARR:.*]] = fir.alloca !fir.type + // CHECK: fir.store %[[OUT]] to %[[ARR]] : !fir.ref> + // CHECK: %[[CVT:.*]] = fir.convert %[[ARR]] : (!fir.ref>) -> !fir.ref> + // CHECK: %[[LD:.*]] = fir.load %[[CVT]] : !fir.ref> + // CHECK: llvm.intr.stackrestore %[[STACK]] : !llvm.ptr + %out = fir.call @test_hfa_f16() : () -> !hfa_f16 + // CHECK: fir.store %[[LD]] to %[[ARG0]] : !fir.ref> + fir.store %out to %arg0 : !fir.ref + return +} + +!hfa_f32 = !fir.type +// CHECK-LABEL: func.func private @test_hfa_f32() -> !fir.type +func.func private @test_hfa_f32() -> !hfa_f32 +// CHECK-LABEL: func.func @test_call_hfa_f32( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref>) { +func.func @test_call_hfa_f32(%arg0 : !fir.ref) { + // CHECK: %[[OUT:.*]] = fir.call @test_hfa_f32() : () -> !fir.type + // CHECK: %[[STACK:.*]] = llvm.intr.stacksave : !llvm.ptr + // CHECK: %[[ARR:.*]] = fir.alloca !fir.type + // CHECK: fir.store %[[OUT]] to %[[ARR]] : !fir.ref> + // CHECK: %[[CVT:.*]] = fir.convert %[[ARR]] : (!fir.ref>) -> !fir.ref> + // CHECK: %[[LD:.*]] = fir.load %[[CVT]] : !fir.ref> + // CHECK: llvm.intr.stackrestore %[[STACK]] : !llvm.ptr + %out = fir.call @test_hfa_f32() : () -> !hfa_f32 + // CHECK: fir.store %[[LD]] to %[[ARG0]] : !fir.ref> + fir.store %out to %arg0 : !fir.ref + return +} + +!hfa_f64 = !fir.type +// CHECK-LABEL: func.func private @test_hfa_f64() -> !fir.type +func.func private @test_hfa_f64() -> !hfa_f64 +// CHECK-LABEL: func.func @test_call_hfa_f64( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref>) +func.func @test_call_hfa_f64(%arg0 : !fir.ref) { + // CHECK: %[[OUT:.*]] = fir.call @test_hfa_f64() : () -> !fir.type + // CHECK: %[[STACK:.*]] = llvm.intr.stacksave : !llvm.ptr + // CHECK: %[[ARR:.*]] = fir.alloca !fir.type + // CHECK: fir.store %[[OUT]] to %[[ARR]] : !fir.ref> + // CHECK: %[[CVT:.*]] = fir.convert %[[ARR]] : (!fir.ref>) -> !fir.ref> + // CHECK: %[[LD:.*]] = fir.load %[[CVT]] : !fir.ref> + // CHECK: llvm.intr.stackrestore %[[STACK]] : !llvm.ptr + %out = fir.call @test_hfa_f64() : () -> !hfa_f64 + // CHECK: fir.store %[[LD]] to %[[ARG0]] : !fir.ref> + fir.store %out to %arg0 : !fir.ref + return +} + +!hfa_f128 = !fir.type +// CHECK-LABEL: func.func private @test_hfa_f128() -> !fir.type +func.func private @test_hfa_f128() -> !hfa_f128 +// CHECK-LABEL: func.func @test_call_hfa_f128( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref>) { +func.func @test_call_hfa_f128(%arg0 : !fir.ref) { + // CHECK: %[[OUT:.*]] = fir.call @test_hfa_f128() : () -> !fir.type + // CHECK: %[[STACK:.*]] = llvm.intr.stacksave : !llvm.ptr + // CHECK: %[[ARR:.*]] = fir.alloca !fir.type + // CHECK: fir.store %[[OUT]] to %[[ARR]] : !fir.ref> + // CHECK: %[[CVT:.*]] = fir.convert %[[ARR]] : (!fir.ref>) -> !fir.ref> + // CHECK: %[[LD:.*]] = fir.load %[[CVT]] : !fir.ref> + // CHECK: llvm.intr.stackrestore %[[STACK]] : !llvm.ptr + %out = fir.call @test_hfa_f128() : () -> !hfa_f128 + // CHECK: fir.store %[[LD]] to %[[ARG0]] : !fir.ref> + fir.store %out to %arg0 : !fir.ref + return +} + +!hfa_bf16 = !fir.type +// CHECK-LABEL: func.func private @test_hfa_bf16() -> !fir.type +func.func private @test_hfa_bf16() -> !hfa_bf16 +// CHECK-LABEL: func.func @test_call_hfa_bf16( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref>) { +func.func @test_call_hfa_bf16(%arg0 : !fir.ref) { + // CHECK: %[[OUT:.*]] = fir.call @test_hfa_bf16() : () -> !fir.type + // CHECK: %[[STACK:.*]] = llvm.intr.stacksave : !llvm.ptr + // CHECK: %[[ARR:.*]] = fir.alloca !fir.type + // CHECK: fir.store %[[OUT]] to %[[ARR]] : !fir.ref> + // CHECK: %[[CVT:.*]] = fir.convert %[[ARR]] : (!fir.ref>) -> !fir.ref> + // CHECK: %[[LD:.*]] = fir.load %[[CVT]] : !fir.ref> + // CHECK: llvm.intr.stackrestore %[[STACK]] : !llvm.ptr + %out = fir.call @test_hfa_bf16() : () -> !hfa_bf16 + // CHECK: fir.store %[[LD]] to %[[ARG0]] : !fir.ref> + fir.store %out to %arg0 : !fir.ref + return +} + +!too_big = !fir.type +// CHECK-LABEL: func.func private @test_too_big(!fir.ref> +// CHECK-SAME: {llvm.align = 8 : i32, llvm.sret = !fir.type}) +func.func private @test_too_big() -> !too_big +// CHECK-LABEL: func.func @test_call_too_big( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref>) { +func.func @test_call_too_big(%arg0 : !fir.ref) { + // CHECK: %[[STACK:.*]] = llvm.intr.stacksave : !llvm.ptr + // CHECK: %[[ARG:.*]] = fir.alloca !fir.type + // CHECK: fir.call @test_too_big(%[[ARG]]) : (!fir.ref>) -> () + // CHECK: %[[CVT:.*]] = fir.convert %[[ARG]] : (!fir.ref>) -> !fir.ref> + // CHECK: %[[LD:.*]] = fir.load %[[CVT]] : !fir.ref> + // CHECK: llvm.intr.stackrestore %[[STACK]] : !llvm.ptr + %out = fir.call @test_too_big() : () -> !too_big + // CHECK: fir.store %[[LD]] to %[[ARG0]] : !fir.ref> + fir.store %out to %arg0 : !fir.ref + return +} + + +!too_big_hfa = !fir.type}> +// CHECK-LABEL: func.func private @test_too_big_hfa(!fir.ref}>> +// CHECK-SAME: {llvm.align = 8 : i32, llvm.sret = !fir.type}>}) +func.func private @test_too_big_hfa() -> !too_big_hfa +// CHECK-LABEL: func.func @test_call_too_big_hfa( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref}>>) { +func.func @test_call_too_big_hfa(%arg0 : !fir.ref) { + // CHECK: %[[STACK:.*]] = llvm.intr.stacksave : !llvm.ptr + // CHECK: %[[ARG:.*]] = fir.alloca !fir.type}> + // CHECK: fir.call @test_too_big_hfa(%[[ARG]]) : (!fir.ref}>>) -> () + // CHECK: %[[CVT:.*]] = fir.convert %[[ARG]] : (!fir.ref}>>) -> !fir.ref}>> + // CHECK: %[[LD:.*]] = fir.load %[[CVT]] : !fir.ref}>> + // CHECK: llvm.intr.stackrestore %[[STACK]] : !llvm.ptr + %out = fir.call @test_too_big_hfa() : () -> !too_big_hfa + // CHECK: fir.store %[[LD]] to %[[ARG0]] : !fir.ref}>> + fir.store %out to %arg0 : !fir.ref + return +} + +!nested_hfa_first = !fir.type +// CHECK-LABEL: func.func private @test_nested_hfa_first() -> !fir.type,c:f16}> +func.func private @test_nested_hfa_first() -> !nested_hfa_first +// CHECK-LABEL: func.func @test_call_nested_hfa_first(%arg0: !fir.ref,c:f16}>>) { +func.func @test_call_nested_hfa_first(%arg0 : !fir.ref) { + %out = fir.call @test_nested_hfa_first() : () -> !nested_hfa_first + // CHECK: %[[OUT:.*]] = fir.call @test_nested_hfa_first() : () -> !fir.type,c:f16}> + // CHECK: %[[STACK:.*]] = llvm.intr.stacksave : !llvm.ptr + // CHECK: %[[ARR:.*]] = fir.alloca !fir.type,c:f16}> + // CHECK: fir.store %[[OUT]] to %[[ARR]] : !fir.ref,c:f16}>> + // CHECK: %[[CVT:.*]] = fir.convert %[[ARR]] : (!fir.ref,c:f16}>> + // CHECK: %[[LD:.*]] = fir.load %[[CVT]] : !fir.ref,c:f16}>> + // CHECK: llvm.intr.stackrestore %[[STACK]] : !llvm.ptr + fir.store %out to %arg0 : !fir.ref + // CHECK fir.store %[[LD]] to %[[ARG0]] : !fir.ref,c:f16}>> + return +} + + +!nested_hfa_middle = !fir.type +// CHECK-LABEL: func.func private @test_nested_hfa_middle() -> !fir.type,c:f16}> +func.func private @test_nested_hfa_middle() -> !nested_hfa_middle +// CHECK-LABEL: func.func @test_call_nested_hfa_middle(%arg0: !fir.ref,c:f16}>>) { +func.func @test_call_nested_hfa_middle(%arg0 : !fir.ref) { + %out = fir.call @test_nested_hfa_middle() : () -> !nested_hfa_middle + // CHECK: %[[OUT:.*]] = fir.call @test_nested_hfa_middle() : () -> !fir.type,c:f16}> + // CHECK: %[[STACK:.*]] = llvm.intr.stacksave : !llvm.ptr + // CHECK: %[[ARR:.*]] = fir.alloca !fir.type,c:f16}> + // CHECK: fir.store %[[OUT]] to %[[ARR]] : !fir.ref,c:f16}>> + // CHECK: %[[CVT:.*]] = fir.convert %[[ARR]] : (!fir.ref,c:f16}>> + // CHECK: %[[LD:.*]] = fir.load %[[CVT]] : !fir.ref,c:f16}>> + // CHECK: llvm.intr.stackrestore %[[STACK]] : !llvm.ptr + fir.store %out to %arg0 : !fir.ref + // CHECK fir.store %[[LD]] to %[[ARG0]] : !fir.ref,c:f16}>> + return +} + +!nested_hfa_end = !fir.type +// CHECK-LABEL: func.func private @test_nested_hfa_end() -> !fir.type}> +func.func private @test_nested_hfa_end() -> !nested_hfa_end +// CHECK-LABEL: func.func @test_call_nested_hfa_end(%arg0: !fir.ref}>>) { +func.func @test_call_nested_hfa_end(%arg0 : !fir.ref) { + %out = fir.call @test_nested_hfa_end() : () -> !nested_hfa_end + // CHECK: %[[OUT:.*]] = fir.call @test_nested_hfa_end() : () -> !fir.type}> + // CHECK: %[[STACK:.*]] = llvm.intr.stacksave : !llvm.ptr + // CHECK: %[[ARR:.*]] = fir.alloca !fir.type}> + // CHECK: fir.store %[[OUT]] to %[[ARR]] : !fir.ref}>> + // CHECK: %[[CVT:.*]] = fir.convert %[[ARR]] : (!fir.ref}>> + // CHECK: %[[LD:.*]] = fir.load %[[CVT]] : !fir.ref}>> + // CHECK: llvm.intr.stackrestore %[[STACK]] : !llvm.ptr + fir.store %out to %arg0 : !fir.ref + // CHECK fir.store %[[LD]] to %[[ARG0]] : !fir.ref}>> + return +} + +!nested_hfa_array = !fir.type,b:f32}> +// CHECK-LABEL: func.func private @test_nested_hfa_array() -> !fir.type,b:f32}> +func.func private @test_nested_hfa_array() -> !nested_hfa_array +// CHECK-LABEL: func.func @test_call_nested_hfa_array(%arg0: !fir.ref,b:f32}> +func.func @test_call_nested_hfa_array(%arg0 : !fir.ref) { + %out = fir.call @test_nested_hfa_array() : () -> !nested_hfa_array + // CHECK: %[[OUT:.*]] = fir.call @test_nested_hfa_array() : () -> !fir.type,b:f32}> + // CHECK: %[[STACK:.*]] = llvm.intr.stacksave : !llvm.ptr + // CHECK: %[[ARR:.*]] = fir.alloca !fir.type,b:f32}> + // CHECK: fir.store %[[OUT]] to %[[ARR]] : !fir.ref,b:f32}> + // CHECK: %[[CVT:.*]] = fir.convert %[[ARR]] : (!fir.ref,b:f32}> + // CHECK: %[[LD:.*]] = fir.load %[[CVT]] : !fir.ref,b:f32}> + // CHECK: llvm.intr.stackrestore %[[STACK]] : !llvm.ptr + fir.store %out to %arg0 : !fir.ref + // CHECK fir.store %[[LD]] to %[[ARG0]] : !fir.ref,b:f32}> + return +} diff --git a/flang/test/Lower/OpenMP/Todo/flush-seq-cst.f90 b/flang/test/Lower/OpenMP/Todo/flush-seq-cst.f90 new file mode 100644 index 0000000000000..753e1cfcd7aa5 --- /dev/null +++ b/flang/test/Lower/OpenMP/Todo/flush-seq-cst.f90 @@ -0,0 +1,6 @@ +! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s -fopenmp-version=51 2>&1 | FileCheck %s + +! CHECK: not yet implemented: Unhandled clause SEQ_CST in FLUSH construct +program flush_seq_cst + !$omp flush seq_cst +end program \ No newline at end of file diff --git a/flang/test/Lower/OpenMP/Todo/map-mapper.f90 b/flang/test/Lower/OpenMP/Todo/map-mapper.f90 index d83c20db29307..9554ffd5fda7b 100644 --- a/flang/test/Lower/OpenMP/Todo/map-mapper.f90 +++ b/flang/test/Lower/OpenMP/Todo/map-mapper.f90 @@ -8,7 +8,7 @@ program p !!end type t1 !!!$omp declare mapper(xx : t1 :: nn) map(nn, nn%x) !$omp target map(mapper(xx), from:a) -!CHECK: not yet implemented: OmpMapClause(MAPPER(...)) +!CHECK: not yet implemented: Support for mapper modifiers is not implemented yet do i=1,n a(i) = 4.2 end do diff --git a/flang/test/Parser/OpenMP/from-clause.f90 b/flang/test/Parser/OpenMP/from-clause.f90 index cff9c077c0a94..acd5843ff0c4a 100644 --- a/flang/test/Parser/OpenMP/from-clause.f90 +++ b/flang/test/Parser/OpenMP/from-clause.f90 @@ -28,7 +28,7 @@ subroutine f01(x) !PARSE-TREE: OmpSimpleStandaloneDirective -> llvm::omp::Directive = target update !PARSE-TREE: OmpClauseList -> OmpClause -> From -> OmpFromClause -!PARSE-TREE: | Expectation = Present +!PARSE-TREE: | Modifier -> OmpExpectation -> Value = Present !PARSE-TREE: | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x' !PARSE-TREE: | bool = 'true' @@ -44,8 +44,8 @@ subroutine f02(x) !PARSE-TREE: OmpSimpleStandaloneDirective -> llvm::omp::Directive = target update !PARSE-TREE: OmpClauseList -> OmpClause -> From -> OmpFromClause -!PARSE-TREE: | Expectation = Present -!PARSE-TREE: | OmpIterator -> OmpIteratorSpecifier +!PARSE-TREE: | Modifier -> OmpExpectation -> Value = Present +!PARSE-TREE: | Modifier -> OmpIterator -> OmpIteratorSpecifier !PARSE-TREE: | | TypeDeclarationStmt !PARSE-TREE: | | | DeclarationTypeSpec -> IntrinsicTypeSpec -> IntegerTypeSpec -> !PARSE-TREE: | | | EntityDecl @@ -73,8 +73,8 @@ subroutine f03(x) !PARSE-TREE: OmpSimpleStandaloneDirective -> llvm::omp::Directive = target update !PARSE-TREE: OmpClauseList -> OmpClause -> From -> OmpFromClause -!PARSE-TREE: | Expectation = Present -!PARSE-TREE: | OmpIterator -> OmpIteratorSpecifier +!PARSE-TREE: | Modifier -> OmpExpectation -> Value = Present +!PARSE-TREE: | Modifier -> OmpIterator -> OmpIteratorSpecifier !PARSE-TREE: | | TypeDeclarationStmt !PARSE-TREE: | | | DeclarationTypeSpec -> IntrinsicTypeSpec -> IntegerTypeSpec -> !PARSE-TREE: | | | EntityDecl diff --git a/flang/test/Parser/OpenMP/map-modifiers.f90 b/flang/test/Parser/OpenMP/map-modifiers.f90 index 578512283c4dc..4e034e51352e4 100644 --- a/flang/test/Parser/OpenMP/map-modifiers.f90 +++ b/flang/test/Parser/OpenMP/map-modifiers.f90 @@ -18,11 +18,11 @@ subroutine f00(x) !PARSE-TREE: OmpBeginBlockDirective !PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause -!PARSE-TREE: | | TypeModifier = Ompx_Hold -!PARSE-TREE: | | TypeModifier = Always -!PARSE-TREE: | | TypeModifier = Present -!PARSE-TREE: | | TypeModifier = Close -!PARSE-TREE: | | Type = To +!PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Ompx_Hold +!PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Always +!PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Present +!PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Close +!PARSE-TREE: | | Modifier -> OmpMapType -> Value = To !PARSE-TREE: | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x' !PARSE-TREE: | | bool = 'true' @@ -43,10 +43,10 @@ subroutine f01(x) !PARSE-TREE: OmpBeginBlockDirective !PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause -!PARSE-TREE: | | TypeModifier = Ompx_Hold -!PARSE-TREE: | | TypeModifier = Always -!PARSE-TREE: | | TypeModifier = Present -!PARSE-TREE: | | TypeModifier = Close +!PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Ompx_Hold +!PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Always +!PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Present +!PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Close !PARSE-TREE: | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x' !PARSE-TREE: | | bool = 'true' @@ -67,7 +67,7 @@ subroutine f02(x) !PARSE-TREE: OmpBeginBlockDirective !PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause -!PARSE-TREE: | | Type = From +!PARSE-TREE: | | Modifier -> OmpMapType -> Value = From !PARSE-TREE: | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x' !PARSE-TREE: | | bool = 'true' @@ -108,11 +108,11 @@ subroutine f04(x) !PARSE-TREE: OmpBeginBlockDirective !PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause -!PARSE-TREE: | | TypeModifier = Ompx_Hold -!PARSE-TREE: | | TypeModifier = Always -!PARSE-TREE: | | TypeModifier = Present -!PARSE-TREE: | | TypeModifier = Close -!PARSE-TREE: | | Type = To +!PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Ompx_Hold +!PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Always +!PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Present +!PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Close +!PARSE-TREE: | | Modifier -> OmpMapType -> Value = To !PARSE-TREE: | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x' !PARSE-TREE: | | bool = 'false' @@ -133,10 +133,10 @@ subroutine f05(x) !PARSE-TREE: OmpBeginBlockDirective !PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause -!PARSE-TREE: | | TypeModifier = Ompx_Hold -!PARSE-TREE: | | TypeModifier = Always -!PARSE-TREE: | | TypeModifier = Present -!PARSE-TREE: | | TypeModifier = Close +!PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Ompx_Hold +!PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Always +!PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Present +!PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Close !PARSE-TREE: | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x' !PARSE-TREE: | | bool = 'true' @@ -158,8 +158,8 @@ subroutine f10(x) !PARSE-TREE: OmpBeginBlockDirective !PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause -!PARSE-TREE: | | TypeModifier = Present -!PARSE-TREE: | | OmpIterator -> OmpIteratorSpecifier +!PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Present +!PARSE-TREE: | | Modifier -> OmpIterator -> OmpIteratorSpecifier !PARSE-TREE: | | | TypeDeclarationStmt !PARSE-TREE: | | | | DeclarationTypeSpec -> IntrinsicTypeSpec -> IntegerTypeSpec -> !PARSE-TREE: | | | | EntityDecl @@ -169,7 +169,7 @@ subroutine f10(x) !PARSE-TREE: | | | | | LiteralConstant -> IntLiteralConstant = '1' !PARSE-TREE: | | | | Scalar -> Integer -> Expr = '10_4' !PARSE-TREE: | | | | | LiteralConstant -> IntLiteralConstant = '10' -!PARSE-TREE: | | Type = To +!PARSE-TREE: | | Modifier -> OmpMapType -> Value = To !PARSE-TREE: | | OmpObjectList -> OmpObject -> Designator -> DataRef -> ArrayElement !PARSE-TREE: | | | DataRef -> Name = 'x' !PARSE-TREE: | | | SectionSubscript -> Integer -> Expr = 'i' @@ -193,8 +193,8 @@ subroutine f11(x) !PARSE-TREE: OmpBeginBlockDirective !PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause -!PARSE-TREE: | | TypeModifier = Present -!PARSE-TREE: | | OmpIterator -> OmpIteratorSpecifier +!PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Present +!PARSE-TREE: | | Modifier -> OmpIterator -> OmpIteratorSpecifier !PARSE-TREE: | | | TypeDeclarationStmt !PARSE-TREE: | | | | DeclarationTypeSpec -> IntrinsicTypeSpec -> IntegerTypeSpec -> !PARSE-TREE: | | | | EntityDecl @@ -204,7 +204,7 @@ subroutine f11(x) !PARSE-TREE: | | | | | LiteralConstant -> IntLiteralConstant = '1' !PARSE-TREE: | | | | Scalar -> Integer -> Expr = '10_4' !PARSE-TREE: | | | | | LiteralConstant -> IntLiteralConstant = '10' -!PARSE-TREE: | | Type = To +!PARSE-TREE: | | Modifier -> OmpMapType -> Value = To !PARSE-TREE: | | OmpObjectList -> OmpObject -> Designator -> DataRef -> ArrayElement !PARSE-TREE: | | | DataRef -> Name = 'x' !PARSE-TREE: | | | SectionSubscript -> Integer -> Expr = 'i' @@ -228,8 +228,8 @@ subroutine f12(x) !PARSE-TREE: OmpBeginBlockDirective !PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause -!PARSE-TREE: | | TypeModifier = Present -!PARSE-TREE: | | OmpIterator -> OmpIteratorSpecifier +!PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Present +!PARSE-TREE: | | Modifier -> OmpIterator -> OmpIteratorSpecifier !PARSE-TREE: | | | TypeDeclarationStmt !PARSE-TREE: | | | | DeclarationTypeSpec -> IntrinsicTypeSpec -> IntegerTypeSpec -> !PARSE-TREE: | | | | EntityDecl @@ -239,17 +239,17 @@ subroutine f12(x) !PARSE-TREE: | | | | | LiteralConstant -> IntLiteralConstant = '1' !PARSE-TREE: | | | | Scalar -> Integer -> Expr = '10_4' !PARSE-TREE: | | | | | LiteralConstant -> IntLiteralConstant = '10' -!PARSE-TREE: | | OmpIteratorSpecifier -!PARSE-TREE: | | | TypeDeclarationStmt -!PARSE-TREE: | | | | DeclarationTypeSpec -> IntrinsicTypeSpec -> IntegerTypeSpec -> -!PARSE-TREE: | | | | EntityDecl -!PARSE-TREE: | | | | | Name = 'j' -!PARSE-TREE: | | | SubscriptTriplet -!PARSE-TREE: | | | | Scalar -> Integer -> Expr = '1_4' -!PARSE-TREE: | | | | | LiteralConstant -> IntLiteralConstant = '1' -!PARSE-TREE: | | | | Scalar -> Integer -> Expr = '10_4' -!PARSE-TREE: | | | | | LiteralConstant -> IntLiteralConstant = '10' -!PARSE-TREE: | | Type = To +!PARSE-TREE: | | | OmpIteratorSpecifier +!PARSE-TREE: | | | | TypeDeclarationStmt +!PARSE-TREE: | | | | | DeclarationTypeSpec -> IntrinsicTypeSpec -> IntegerTypeSpec -> +!PARSE-TREE: | | | | | EntityDecl +!PARSE-TREE: | | | | | | Name = 'j' +!PARSE-TREE: | | | | SubscriptTriplet +!PARSE-TREE: | | | | | Scalar -> Integer -> Expr = '1_4' +!PARSE-TREE: | | | | | | LiteralConstant -> IntLiteralConstant = '1' +!PARSE-TREE: | | | | | Scalar -> Integer -> Expr = '10_4' +!PARSE-TREE: | | | | | | LiteralConstant -> IntLiteralConstant = '10' +!PARSE-TREE: | | Modifier -> OmpMapType -> Value = To !PARSE-TREE: | | OmpObjectList -> OmpObject -> Designator -> DataRef -> ArrayElement !PARSE-TREE: | | | DataRef -> Name = 'x' !PARSE-TREE: | | | SectionSubscript -> Integer -> Expr = '(i+j)/2_4' @@ -265,7 +265,7 @@ subroutine f12(x) !PARSE-TREE: | | | | | | LiteralConstant -> IntLiteralConstant = '2' !PARSE-TREE: | | bool = 'true' -subroutine f90(x, y) +subroutine f20(x, y) integer :: x(10) integer :: y integer, parameter :: p = 23 @@ -274,7 +274,7 @@ subroutine f90(x, y) !$omp end target end -!UNPARSE: SUBROUTINE f90 (x, y) +!UNPARSE: SUBROUTINE f20 (x, y) !UNPARSE: INTEGER x(10_4) !UNPARSE: INTEGER y !UNPARSE: INTEGER, PARAMETER :: p = 23_4 @@ -286,8 +286,8 @@ subroutine f90(x, y) !PARSE-TREE: OmpBeginBlockDirective !PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause -!PARSE-TREE: | | TypeModifier = Present -!PARSE-TREE: | | OmpIterator -> OmpIteratorSpecifier +!PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Present +!PARSE-TREE: | | Modifier -> OmpIterator -> OmpIteratorSpecifier !PARSE-TREE: | | | TypeDeclarationStmt !PARSE-TREE: | | | | DeclarationTypeSpec -> IntrinsicTypeSpec -> IntegerTypeSpec -> !PARSE-TREE: | | | | EntityDecl @@ -299,24 +299,24 @@ subroutine f90(x, y) !PARSE-TREE: | | | | | Designator -> DataRef -> Name = 'y' !PARSE-TREE: | | | | Scalar -> Integer -> Expr = '23_4' !PARSE-TREE: | | | | | Designator -> DataRef -> Name = 'p' -!PARSE-TREE: | | OmpIteratorSpecifier -!PARSE-TREE: | | | TypeDeclarationStmt -!PARSE-TREE: | | | | DeclarationTypeSpec -> IntrinsicTypeSpec -> IntegerTypeSpec -> -!PARSE-TREE: | | | | EntityDecl -!PARSE-TREE: | | | | | Name = 'k' -!PARSE-TREE: | | | SubscriptTriplet -!PARSE-TREE: | | | | Scalar -> Integer -> Expr = 'i' -!PARSE-TREE: | | | | | Designator -> DataRef -> Name = 'i' -!PARSE-TREE: | | | | Scalar -> Integer -> Expr = 'j' -!PARSE-TREE: | | | | | Designator -> DataRef -> Name = 'j' -!PARSE-TREE: | | Type = To +!PARSE-TREE: | | | OmpIteratorSpecifier +!PARSE-TREE: | | | | TypeDeclarationStmt +!PARSE-TREE: | | | | | DeclarationTypeSpec -> IntrinsicTypeSpec -> IntegerTypeSpec -> +!PARSE-TREE: | | | | | EntityDecl +!PARSE-TREE: | | | | | | Name = 'k' +!PARSE-TREE: | | | | SubscriptTriplet +!PARSE-TREE: | | | | | Scalar -> Integer -> Expr = 'i' +!PARSE-TREE: | | | | | | Designator -> DataRef -> Name = 'i' +!PARSE-TREE: | | | | | Scalar -> Integer -> Expr = 'j' +!PARSE-TREE: | | | | | | Designator -> DataRef -> Name = 'j' +!PARSE-TREE: | | Modifier -> OmpMapType -> Value = To !PARSE-TREE: | | OmpObjectList -> OmpObject -> Designator -> DataRef -> ArrayElement !PARSE-TREE: | | | DataRef -> Name = 'x' !PARSE-TREE: | | | SectionSubscript -> Integer -> Expr = 'k' !PARSE-TREE: | | | | Designator -> DataRef -> Name = 'k' !PARSE-TREE: | | bool = 'true' -subroutine f100(x, y) +subroutine f21(x, y) integer :: x(10) integer :: y integer, parameter :: p = 23 @@ -325,7 +325,7 @@ subroutine f100(x, y) !$omp end target end -!UNPARSE: SUBROUTINE f100 (x, y) +!UNPARSE: SUBROUTINE f21 (x, y) !UNPARSE: INTEGER x(10_4) !UNPARSE: INTEGER y !UNPARSE: INTEGER, PARAMETER :: p = 23_4 @@ -337,7 +337,42 @@ subroutine f100(x, y) !PARSE-TREE: OmpBeginBlockDirective !PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause -!PARSE-TREE: | | OmpMapperIdentifier -> Name = 'xx' -!PARSE-TREE: | | Type = From +!PARSE-TREE: | | Modifier -> OmpMapper -> Name = 'xx' +!PARSE-TREE: | | Modifier -> OmpMapType -> Value = From !PARSE-TREE: | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x' +subroutine f22(x) + integer :: x(10) + !$omp target map(present, iterator(i = 1:10), always, from: x(i)) + x = x + 1 + !$omp end target +end + +!UNPARSE: SUBROUTINE f22 (x) +!UNPARSE: INTEGER x(10_4) +!UNPARSE: !$OMP TARGET MAP(PRESENT, ITERATOR(INTEGER i = 1_4:10_4), ALWAYS, FROM: x(i)) +!UNPARSE: x=x+1_4 +!UNPARSE: !$OMP END TARGET +!UNPARSE: END SUBROUTINE + +!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpClauseList -> OmpClause -> Map -> OmpMapClause +!PARSE-TREE: | Modifier -> OmpMapTypeModifier -> Value = Present +!PARSE-TREE: | Modifier -> OmpIterator -> OmpIteratorSpecifier +!PARSE-TREE: | | TypeDeclarationStmt +!PARSE-TREE: | | | DeclarationTypeSpec -> IntrinsicTypeSpec -> IntegerTypeSpec -> +!PARSE-TREE: | | | EntityDecl +!PARSE-TREE: | | | | Name = 'i' +!PARSE-TREE: | | SubscriptTriplet +!PARSE-TREE: | | | Scalar -> Integer -> Expr = '1_4' +!PARSE-TREE: | | | | LiteralConstant -> IntLiteralConstant = '1' +!PARSE-TREE: | | | Scalar -> Integer -> Expr = '10_4' +!PARSE-TREE: | | | | LiteralConstant -> IntLiteralConstant = '10' +!PARSE-TREE: | Modifier -> OmpMapTypeModifier -> Value = Always +!PARSE-TREE: | Modifier -> OmpMapType -> Value = From +!PARSE-TREE: | OmpObjectList -> OmpObject -> Designator -> DataRef -> ArrayElement +!PARSE-TREE: | | DataRef -> Name = 'x' +!PARSE-TREE: | | SectionSubscript -> Integer -> Expr = 'i' +!PARSE-TREE: | | | Designator -> DataRef -> Name = 'i' +!PARSE-TREE: | bool = 'true' + diff --git a/flang/test/Parser/OpenMP/target-update-to-clause.f90 b/flang/test/Parser/OpenMP/target-update-to-clause.f90 index bb57270fc0bf9..03006ba37334f 100644 --- a/flang/test/Parser/OpenMP/target-update-to-clause.f90 +++ b/flang/test/Parser/OpenMP/target-update-to-clause.f90 @@ -28,7 +28,7 @@ subroutine f01(x) !PARSE-TREE: OmpSimpleStandaloneDirective -> llvm::omp::Directive = target update !PARSE-TREE: OmpClauseList -> OmpClause -> To -> OmpToClause -!PARSE-TREE: | Expectation = Present +!PARSE-TREE: | Modifier -> OmpExpectation -> Value = Present !PARSE-TREE: | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x' !PARSE-TREE: | bool = 'true' @@ -44,8 +44,8 @@ subroutine f02(x) !PARSE-TREE: OmpSimpleStandaloneDirective -> llvm::omp::Directive = target update !PARSE-TREE: OmpClauseList -> OmpClause -> To -> OmpToClause -!PARSE-TREE: | Expectation = Present -!PARSE-TREE: | OmpIterator -> OmpIteratorSpecifier +!PARSE-TREE: | Modifier -> OmpExpectation -> Value = Present +!PARSE-TREE: | Modifier -> OmpIterator -> OmpIteratorSpecifier !PARSE-TREE: | | TypeDeclarationStmt !PARSE-TREE: | | | DeclarationTypeSpec -> IntrinsicTypeSpec -> IntegerTypeSpec -> !PARSE-TREE: | | | EntityDecl @@ -73,8 +73,8 @@ subroutine f03(x) !PARSE-TREE: OmpSimpleStandaloneDirective -> llvm::omp::Directive = target update !PARSE-TREE: OmpClauseList -> OmpClause -> To -> OmpToClause -!PARSE-TREE: | Expectation = Present -!PARSE-TREE: | OmpIterator -> OmpIteratorSpecifier +!PARSE-TREE: | Modifier -> OmpExpectation -> Value = Present +!PARSE-TREE: | Modifier -> OmpIterator -> OmpIteratorSpecifier !PARSE-TREE: | | TypeDeclarationStmt !PARSE-TREE: | | | DeclarationTypeSpec -> IntrinsicTypeSpec -> IntegerTypeSpec -> !PARSE-TREE: | | | EntityDecl diff --git a/flang/test/Semantics/OpenMP/clause-validity01.f90 b/flang/test/Semantics/OpenMP/clause-validity01.f90 index 406d30b38948e..8dd6d10200cd3 100644 --- a/flang/test/Semantics/OpenMP/clause-validity01.f90 +++ b/flang/test/Semantics/OpenMP/clause-validity01.f90 @@ -1,6 +1,6 @@ ! REQUIRES: openmp_runtime -! RUN: %python %S/../test_errors.py %s %flang_fc1 %openmp_flags %openmp_module_flag -fopenmp-version=50 +! RUN: %python %S/../test_errors.py %s %flang_fc1 %openmp_flags %openmp_module_flag -fopenmp-version=51 use omp_lib ! Check OpenMP clause validity for the following directives: ! @@ -507,7 +507,6 @@ !$omp flush acquire !ERROR: If memory-order-clause is RELEASE, ACQUIRE, or ACQ_REL, list items must not be specified on the FLUSH directive !$omp flush release (c) - !ERROR: SEQ_CST clause is not allowed on the FLUSH directive !$omp flush seq_cst !ERROR: RELAXED clause is not allowed on the FLUSH directive !$omp flush relaxed diff --git a/flang/test/Semantics/OpenMP/combined-constructs.f90 b/flang/test/Semantics/OpenMP/combined-constructs.f90 index 25893a47860f4..4f2a4a4f501b9 100644 --- a/flang/test/Semantics/OpenMP/combined-constructs.f90 +++ b/flang/test/Semantics/OpenMP/combined-constructs.f90 @@ -33,7 +33,7 @@ program main enddo !$omp end target parallel - !ERROR: A variable-category modifier is required + !ERROR: 'variable-category' modifier is required !$omp target parallel defaultmap(tofrom) do i = 1, N a(i) = 3.14 @@ -80,7 +80,7 @@ program main enddo !$omp end target parallel do - !ERROR: A variable-category modifier is required + !ERROR: 'variable-category' modifier is required !$omp target parallel do defaultmap(tofrom) do i = 1, N a(i) = 3.14 @@ -140,7 +140,7 @@ program main enddo !$omp end target teams - !ERROR: A variable-category modifier is required + !ERROR: 'variable-category' modifier is required !$omp target teams defaultmap(tofrom) do i = 1, N a(i) = 3.14 @@ -240,7 +240,7 @@ program main enddo !$omp end target teams distribute - !ERROR: A variable-category modifier is required + !ERROR: 'variable-category' modifier is required !$omp target teams distribute defaultmap(tofrom) do i = 1, N a(i) = 3.14 @@ -333,7 +333,7 @@ program main enddo !$omp end target teams distribute parallel do - !ERROR: A variable-category modifier is required + !ERROR: 'variable-category' modifier is required !$omp target teams distribute parallel do defaultmap(tofrom) do i = 1, N a(i) = 3.14 @@ -433,7 +433,7 @@ program main enddo !$omp end target teams distribute parallel do simd - !ERROR: A variable-category modifier is required + !ERROR: 'variable-category' modifier is required !$omp target teams distribute parallel do simd defaultmap(tofrom) do i = 1, N a(i) = 3.14 diff --git a/flang/test/Semantics/OpenMP/defaultmap-clause-v45.f90 b/flang/test/Semantics/OpenMP/defaultmap-clause-v45.f90 index 9cb91a71c5535..904fc306a31f4 100644 --- a/flang/test/Semantics/OpenMP/defaultmap-clause-v45.f90 +++ b/flang/test/Semantics/OpenMP/defaultmap-clause-v45.f90 @@ -1,7 +1,7 @@ !RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=45 -Werror subroutine f00 -!WARNING: A variable-category modifier is required +!WARNING: 'variable-category' modifier is required !$omp target defaultmap(tofrom) !$omp end target end diff --git a/flang/test/Semantics/OpenMP/flush02.f90 b/flang/test/Semantics/OpenMP/flush02.f90 index ed0cf6602d574..615332c6cf31c 100644 --- a/flang/test/Semantics/OpenMP/flush02.f90 +++ b/flang/test/Semantics/OpenMP/flush02.f90 @@ -1,6 +1,6 @@ ! REQUIRES: openmp_runtime -! RUN: %python %S/../test_errors.py %s %flang_fc1 %openmp_flags -fopenmp-version=50 +! RUN: %python %S/../test_errors.py %s %flang_fc1 %openmp_flags -fopenmp-version=51 ! Check OpenMP 5.0 - 2.17.8 flush Construct ! Restriction - @@ -27,7 +27,6 @@ !Only memory-order-clauses. if (omp_get_thread_num() == 1) THEN ! Not allowed clauses. - !ERROR: SEQ_CST clause is not allowed on the FLUSH directive !$omp flush seq_cst !ERROR: RELAXED clause is not allowed on the FLUSH directive !$omp flush relaxed @@ -41,7 +40,6 @@ !$omp flush acquire acquire ! Mix of allowed and not allowed. - !ERROR: SEQ_CST clause is not allowed on the FLUSH directive !$omp flush seq_cst acquire END IF diff --git a/flang/test/Semantics/OpenMP/from-clause-v45.f90 b/flang/test/Semantics/OpenMP/from-clause-v45.f90 index 9c418a400e548..98dff295c879d 100644 --- a/flang/test/Semantics/OpenMP/from-clause-v45.f90 +++ b/flang/test/Semantics/OpenMP/from-clause-v45.f90 @@ -8,21 +8,22 @@ subroutine f00(x) subroutine f01(x) integer :: x(10) -!WARNING: Iterator modifiers are not supported in OpenMP v4.5, try -fopenmp-version=51 +!WARNING: 'iterator' modifier is not supported in OpenMP v4.5, try -fopenmp-version=51 !$omp target update from(iterator(i = 1:5): x(i)) end subroutine f02(x) integer :: x(10) -!WARNING: The PRESENT modifier is not supported in OpenMP v4.5, try -fopenmp-version=51 -!WARNING: Iterator modifiers are not supported in OpenMP v4.5, try -fopenmp-version=51 +!WARNING: 'expectation' modifier is not supported in OpenMP v4.5, try -fopenmp-version=51 +!WARNING: 'iterator' modifier is not supported in OpenMP v4.5, try -fopenmp-version=51 !$omp target update from(present, iterator(i = 1:5): x(i)) end subroutine f03(x) integer :: x(10) -!WARNING: The PRESENT modifier is not supported in OpenMP v4.5, try -fopenmp-version=51 -!ERROR: Only one PRESENT modifier is allowed +!WARNING: 'expectation' modifier is not supported in OpenMP v4.5, try -fopenmp-version=51 +!WARNING: 'expectation' modifier is not supported in OpenMP v4.5, try -fopenmp-version=51 +!ERROR: 'expectation' modifier cannot occur multiple times !$omp target update from(present, present: x) end diff --git a/flang/test/Semantics/OpenMP/from-clause-v51.f90 b/flang/test/Semantics/OpenMP/from-clause-v51.f90 index 18139f04c35cf..70c00823d073e 100644 --- a/flang/test/Semantics/OpenMP/from-clause-v51.f90 +++ b/flang/test/Semantics/OpenMP/from-clause-v51.f90 @@ -2,13 +2,13 @@ subroutine f01(x) integer :: x(10) -!ERROR: Only one iterator-modifier is allowed +!ERROR: 'iterator' modifier cannot occur multiple times !$omp target update from(iterator(i = 1:5), iterator(j = 1:5): x(i + j)) end subroutine f03(x) integer :: x(10) -!ERROR: Only one PRESENT modifier is allowed +!ERROR: 'expectation' modifier cannot occur multiple times !$omp target update from(present, present: x) end diff --git a/flang/test/Semantics/OpenMP/map-clause.f90 b/flang/test/Semantics/OpenMP/map-clause.f90 index efcef2571a04a..65ecbd9456464 100644 --- a/flang/test/Semantics/OpenMP/map-clause.f90 +++ b/flang/test/Semantics/OpenMP/map-clause.f90 @@ -1,4 +1,4 @@ -! RUN: %python %S/../test_errors.py %s %flang -fopenmp +! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=52 ! Check OpenMP MAP clause validity. Section 5.8.3 OpenMP 5.2. subroutine sb(arr) diff --git a/flang/test/Semantics/OpenMP/map-modifiers.f90 b/flang/test/Semantics/OpenMP/map-modifiers.f90 index f863185d111e0..aae918a2f1f94 100644 --- a/flang/test/Semantics/OpenMP/map-modifiers.f90 +++ b/flang/test/Semantics/OpenMP/map-modifiers.f90 @@ -83,8 +83,16 @@ subroutine f19(x) subroutine f1a(x) integer :: x(10) -!ERROR: Only one iterator-modifier is allowed +!ERROR: 'iterator' modifier cannot occur multiple times !$omp target map(present, iterator(i = 1:2), iterator(j = 1:2), to: x(i + j)) x = x + 1 !$omp end target end + +subroutine f23(x) + integer :: x(10) +!ERROR: 'map-type' should be the last modifier + !$omp target map(present, from, iterator(i = 1:10): x(i)) + x = x + 1 + !$omp end target +end diff --git a/flang/test/Semantics/OpenMP/to-clause-v45.f90 b/flang/test/Semantics/OpenMP/to-clause-v45.f90 index 39e842492ef08..e4d8967ca14df 100644 --- a/flang/test/Semantics/OpenMP/to-clause-v45.f90 +++ b/flang/test/Semantics/OpenMP/to-clause-v45.f90 @@ -8,21 +8,22 @@ subroutine f00(x) subroutine f01(x) integer :: x(10) -!WARNING: Iterator modifiers are not supported in OpenMP v4.5, try -fopenmp-version=51 +!WARNING: 'iterator' modifier is not supported in OpenMP v4.5, try -fopenmp-version=51 !$omp target update to(iterator(i = 1:5): x(i)) end subroutine f02(x) integer :: x(10) -!WARNING: The PRESENT modifier is not supported in OpenMP v4.5, try -fopenmp-version=51 -!WARNING: Iterator modifiers are not supported in OpenMP v4.5, try -fopenmp-version=51 +!WARNING: 'expectation' modifier is not supported in OpenMP v4.5, try -fopenmp-version=51 +!WARNING: 'iterator' modifier is not supported in OpenMP v4.5, try -fopenmp-version=51 !$omp target update to(present, iterator(i = 1:5): x(i)) end subroutine f03(x) integer :: x(10) -!WARNING: The PRESENT modifier is not supported in OpenMP v4.5, try -fopenmp-version=51 -!ERROR: Only one PRESENT modifier is allowed +!WARNING: 'expectation' modifier is not supported in OpenMP v4.5, try -fopenmp-version=51 +!WARNING: 'expectation' modifier is not supported in OpenMP v4.5, try -fopenmp-version=51 +!ERROR: 'expectation' modifier cannot occur multiple times !$omp target update to(present, present: x) end diff --git a/flang/test/Semantics/OpenMP/to-clause-v51.f90 b/flang/test/Semantics/OpenMP/to-clause-v51.f90 index d4f5f15efeb97..8abbca3bb07cd 100644 --- a/flang/test/Semantics/OpenMP/to-clause-v51.f90 +++ b/flang/test/Semantics/OpenMP/to-clause-v51.f90 @@ -2,13 +2,13 @@ subroutine f01(x) integer :: x(10) -!ERROR: Only one iterator-modifier is allowed +!ERROR: 'iterator' modifier cannot occur multiple times !$omp target update to(iterator(i = 1:5), iterator(j = 1:5): x(i + j)) end subroutine f03(x) integer :: x(10) -!ERROR: Only one PRESENT modifier is allowed +!ERROR: 'expectation' modifier cannot occur multiple times !$omp target update to(present, present: x) end diff --git a/flang/test/Transforms/OpenMP/lower-workshare-nested.mlir b/flang/test/Transforms/OpenMP/lower-workshare-nested.mlir new file mode 100644 index 0000000000000..bfd65f04d94b1 --- /dev/null +++ b/flang/test/Transforms/OpenMP/lower-workshare-nested.mlir @@ -0,0 +1,22 @@ +// RUN: fir-opt --lower-workshare --allow-unregistered-dialect %s | FileCheck %s + +// Checks that the nested loop_wrapper gets parallelized +func.func @wsfunc(%cond : i1) { + omp.workshare { + %c1 = arith.constant 1 : index + %c42 = arith.constant 42 : index + fir.if %cond { + omp.workshare.loop_wrapper { + omp.loop_nest (%arg1) : index = (%c1) to (%c42) inclusive step (%c1) { + "test.test1"() : () -> () + omp.yield + } + } + } + omp.terminator + } + return +} + +// CHECK: fir.if +// CHECK: omp.wsloop nowait diff --git a/libc/CMakeLists.txt b/libc/CMakeLists.txt index 77b659b2ef232..eee5b63bab513 100644 --- a/libc/CMakeLists.txt +++ b/libc/CMakeLists.txt @@ -412,6 +412,12 @@ foreach(entrypoint IN LISTS TARGET_LLVMLIBC_ENTRYPOINTS) list(APPEND TARGET_ENTRYPOINT_NAME_LIST ${entrypoint_name}) endforeach() +if(MSVC AND NOT MSYS) + set(libc_opt_high_flag "/O2") +else() + set(libc_opt_high_flag "-O3") +endif() + add_subdirectory(include) add_subdirectory(config) add_subdirectory(hdr) diff --git a/libc/cmake/modules/prepare_libc_gpu_build.cmake b/libc/cmake/modules/prepare_libc_gpu_build.cmake index e20591b80e6f2..937bd22451c5f 100644 --- a/libc/cmake/modules/prepare_libc_gpu_build.cmake +++ b/libc/cmake/modules/prepare_libc_gpu_build.cmake @@ -72,11 +72,6 @@ else() endif() set(LIBC_GPU_TARGET_ARCHITECTURE "${gpu_test_architecture}") -# The NVPTX backend cannot currently handle objects created in debug mode. -if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX AND CMAKE_BUILD_TYPE STREQUAL "Debug") - set(LIBC_GPU_TESTS_DISABLED TRUE) -endif() - # Identify the GPU loader utility used to run tests. set(LIBC_GPU_LOADER_EXECUTABLE "" CACHE STRING "Executable for the GPU loader.") if(LIBC_GPU_LOADER_EXECUTABLE) diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt index 91611026df105..899a93ad72d4c 100644 --- a/libc/include/CMakeLists.txt +++ b/libc/include/CMakeLists.txt @@ -767,7 +767,6 @@ if(LIBC_TARGET_OS_IS_GPU) gpu/rpc.h DEPENDS .llvm_libc_common_h - .llvm-libc-types.rpc_opcodes_t ) endif() diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt index 81bb0d6e6f50e..ee734eafce362 100644 --- a/libc/include/llvm-libc-types/CMakeLists.txt +++ b/libc/include/llvm-libc-types/CMakeLists.txt @@ -122,7 +122,6 @@ add_header(struct_sockaddr_un HDR struct_sockaddr_un.h DEPENDS .sa_family_t) add_header(struct_sockaddr HDR struct_sockaddr.h DEPENDS .sa_family_t) add_header(struct_iovec HDR struct_iovec.h DEPENDS .size_t) add_header(struct_msghdr HDR struct_msghdr.h DEPENDS .size_t .socklen_t .struct_iovec) -add_header(rpc_opcodes_t HDR rpc_opcodes_t.h) add_header(ACTION HDR ACTION.h) add_header(ENTRY HDR ENTRY.h) add_header(struct_hsearch_data HDR struct_hsearch_data.h) diff --git a/libc/shared/rpc.h b/libc/shared/rpc.h index 489a8cebfb807..3f586744377d9 100644 --- a/libc/shared/rpc.h +++ b/libc/shared/rpc.h @@ -42,6 +42,13 @@ namespace rpc { #define __scoped_atomic_thread_fence(ord, scp) __atomic_thread_fence(ord) #endif +/// Generic codes that can be used whem implementing the server. +enum Status { + SUCCESS = 0x0, + ERROR = 0x1000, + UNHANDLED_OPCODE = 0x1001, +}; + /// A fixed size channel used to communicate between the RPC client and server. struct Buffer { uint64_t data[8]; @@ -78,11 +85,11 @@ template struct Process { RPC_INLINE Process &operator=(Process &&) = default; RPC_INLINE ~Process() = default; - uint32_t port_count = 0; - uint32_t *inbox = nullptr; - uint32_t *outbox = nullptr; - Header *header = nullptr; - Buffer *packet = nullptr; + const uint32_t port_count = 0; + const uint32_t *const inbox = nullptr; + uint32_t *const outbox = nullptr; + Header *const header = nullptr; + Buffer *const packet = nullptr; static constexpr uint64_t NUM_BITS_IN_WORD = sizeof(uint32_t) * 8; uint32_t lock[MAX_PORT_COUNT / NUM_BITS_IN_WORD] = {0}; diff --git a/libc/include/llvm-libc-types/rpc_opcodes_t.h b/libc/shared/rpc_opcodes.h similarity index 93% rename from libc/include/llvm-libc-types/rpc_opcodes_t.h rename to libc/shared/rpc_opcodes.h index f3b35518935a5..430b53aa1870c 100644 --- a/libc/include/llvm-libc-types/rpc_opcodes_t.h +++ b/libc/shared/rpc_opcodes.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIBC_TYPES_RPC_OPCODES_T_H -#define LLVM_LIBC_TYPES_RPC_OPCODES_T_H +#ifndef LLVM_LIBC_SHARED_RPC_OPCODES_H +#define LLVM_LIBC_SHARED_RPC_OPCODES_H #define LLVM_LIBC_RPC_BASE 'c' #define LLVM_LIBC_OPCODE(n) (LLVM_LIBC_RPC_BASE << 24 | n) @@ -46,4 +46,4 @@ typedef enum { RPC_LAST = 0xFFFFFFFF, } rpc_opcode_t; -#endif // LLVM_LIBC_TYPES_RPC_OPCODES_T_H +#endif // LLVM_LIBC_SHARED_RPC_OPCODES_H diff --git a/libc/shared/rpc_util.h b/libc/shared/rpc_util.h index 502014d839ae9..bb0177c01b85e 100644 --- a/libc/shared/rpc_util.h +++ b/libc/shared/rpc_util.h @@ -17,6 +17,11 @@ #define RPC_TARGET_IS_GPU #endif +// Workaround for missing __has_builtin in < GCC 10. +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif + #ifndef RPC_INLINE #define RPC_INLINE inline #endif @@ -141,17 +146,15 @@ template class optional { /// Suspend the thread briefly to assist the thread scheduler during busy loops. RPC_INLINE void sleep_briefly() { -#if defined(LIBC_TARGET_ARCH_IS_NVPTX) +#if defined(__NVPTX__) if (__nvvm_reflect("__CUDA_ARCH") >= 700) asm("nanosleep.u32 64;" ::: "memory"); -#elif defined(LIBC_TARGET_ARCH_IS_AMDGPU) +#elif defined(__AMDGPU__) __builtin_amdgcn_s_sleep(2); -#elif defined(LIBC_TARGET_ARCH_IS_X86) +#elif __has_builtin(__builtin_ia32_pause) __builtin_ia32_pause(); -#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) && __has_builtin(__builtin_arm_isb) +#elif __has_builtin(__builtin_arm_isb) __builtin_arm_isb(0xf); -#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) - asm volatile("isb\n" ::: "memory"); #else // Simply do nothing if sleeping isn't supported on this platform. #endif diff --git a/libc/src/__support/RPC/rpc_client.h b/libc/src/__support/RPC/rpc_client.h index 8923e62e0e22a..7a62e4e983ad0 100644 --- a/libc/src/__support/RPC/rpc_client.h +++ b/libc/src/__support/RPC/rpc_client.h @@ -10,8 +10,8 @@ #define LLVM_LIBC_SRC___SUPPORT_RPC_RPC_CLIENT_H #include "shared/rpc.h" +#include "shared/rpc_opcodes.h" -#include "include/llvm-libc-types/rpc_opcodes_t.h" #include "src/__support/CPP/type_traits.h" #include "src/__support/macros/config.h" diff --git a/libc/src/__support/common.h b/libc/src/__support/common.h index 79803a346f692..42e8a79187fac 100644 --- a/libc/src/__support/common.h +++ b/libc/src/__support/common.h @@ -21,14 +21,15 @@ #define LLVM_LIBC_FUNCTION_ATTR #endif +// clang-format off // Allow each function `func` to have extra attributes specified by defining: // `LLVM_LIBC_FUNCTION_ATTR_func` macro, which should always start with // "LLVM_LIBC_EMPTY, " // // For examples: // #define LLVM_LIBC_FUNCTION_ATTR_memcpy LLVM_LIBC_EMPTY, [[gnu::weak]] -// #define LLVM_LIBC_FUNCTION_ATTR_memchr LLVM_LIBC_EMPTY, [[gnu::weak]] \ -// [[gnu::visibility("default")]] +// #define LLVM_LIBC_FUNCTION_ATTR_memchr LLVM_LIBC_EMPTY, [[gnu::weak]] [[gnu::visibility("default")]] +// clang-format on #define LLVM_LIBC_EMPTY #define GET_SECOND(first, second, ...) second diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 34b0f1424e8fd..a5d17ad023f52 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -1,3 +1,5 @@ + + add_entrypoint_object( canonicalize SRCS @@ -5,7 +7,7 @@ add_entrypoint_object( HDRS ../canonicalize.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.basic_operations ) @@ -17,7 +19,7 @@ add_entrypoint_object( HDRS ../canonicalizef.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.basic_operations ) @@ -29,7 +31,7 @@ add_entrypoint_object( HDRS ../canonicalizef16.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations @@ -42,7 +44,7 @@ add_entrypoint_object( HDRS ../canonicalizef128.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations @@ -55,7 +57,7 @@ add_entrypoint_object( HDRS ../canonicalizel.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.basic_operations ) @@ -67,7 +69,7 @@ add_entrypoint_object( HDRS ../iscanonical.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -77,7 +79,7 @@ add_entrypoint_object( HDRS ../iscanonicalf.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -87,7 +89,7 @@ add_entrypoint_object( HDRS ../iscanonicall.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -97,7 +99,7 @@ add_entrypoint_object( HDRS ../iscanonicalf16.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types ) @@ -109,7 +111,7 @@ add_entrypoint_object( HDRS ../iscanonicalf128.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types ) @@ -121,7 +123,7 @@ add_entrypoint_object( HDRS ../ceil.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations FLAGS @@ -135,7 +137,7 @@ add_entrypoint_object( HDRS ../ceilf.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations FLAGS @@ -149,7 +151,7 @@ add_entrypoint_object( HDRS ../ceill.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations ) @@ -161,7 +163,7 @@ add_entrypoint_object( HDRS ../ceilf16.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.cast libc.src.__support.FPUtil.nearest_integer_operations @@ -178,7 +180,7 @@ add_entrypoint_object( HDRS ../ceilf128.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations @@ -191,7 +193,7 @@ add_entrypoint_object( HDRS ../daddl.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.generic.add_sub ) @@ -203,7 +205,7 @@ add_entrypoint_object( HDRS ../daddf128.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.generic.add_sub @@ -216,7 +218,7 @@ add_entrypoint_object( HDRS ../ddivl.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.generic.div ) @@ -228,7 +230,7 @@ add_entrypoint_object( HDRS ../ddivf128.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.generic.div @@ -244,7 +246,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.fma libc.src.__support.macros.properties.types COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -256,7 +258,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.fma COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -268,7 +270,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.generic.sqrt COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -281,7 +283,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.generic.sqrt COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) @@ -295,7 +297,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.generic.add_sub libc.src.__support.macros.properties.types COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -307,7 +309,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.generic.add_sub COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_header_library( @@ -392,7 +394,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.fp_bits libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -413,7 +415,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.polyeval libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -430,7 +432,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.multiply_add libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -449,7 +451,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.multiply_add libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -471,7 +473,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -493,7 +495,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -515,7 +517,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.multiply_add libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -534,7 +536,7 @@ add_entrypoint_object( libc.src.__support.common libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -555,7 +557,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -574,7 +576,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.multiply_add libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -595,7 +597,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.multiply_add libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -617,7 +619,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.polyeval libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -637,7 +639,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.multiply_add libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -649,7 +651,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} FLAGS MISC_MATH_BASIC_OPS_OPT ) @@ -663,7 +665,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} FLAGS MISC_MATH_BASIC_OPS_OPT ) @@ -677,7 +679,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -692,7 +694,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.architectures libc.src.__support.macros.properties.compiler COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} FLAGS MISC_MATH_BASIC_OPS_OPT ) @@ -707,7 +709,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -719,7 +721,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.generic.add_sub COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -731,7 +733,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.generic.add_sub COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -744,7 +746,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.generic.add_sub libc.src.__support.macros.properties.types COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -754,7 +756,7 @@ add_entrypoint_object( HDRS ../trunc.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations FLAGS @@ -768,7 +770,7 @@ add_entrypoint_object( HDRS ../truncf.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations FLAGS @@ -782,7 +784,7 @@ add_entrypoint_object( HDRS ../truncl.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations ) @@ -794,7 +796,7 @@ add_entrypoint_object( HDRS ../truncf16.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.cast libc.src.__support.FPUtil.nearest_integer_operations @@ -811,7 +813,7 @@ add_entrypoint_object( HDRS ../truncf128.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations @@ -824,7 +826,7 @@ add_entrypoint_object( HDRS ../floor.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations FLAGS @@ -838,7 +840,7 @@ add_entrypoint_object( HDRS ../floorf.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations FLAGS @@ -852,7 +854,7 @@ add_entrypoint_object( HDRS ../floorl.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations ) @@ -864,7 +866,7 @@ add_entrypoint_object( HDRS ../floorf16.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.cast libc.src.__support.FPUtil.nearest_integer_operations @@ -881,7 +883,7 @@ add_entrypoint_object( HDRS ../floorf128.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations @@ -894,7 +896,7 @@ add_entrypoint_object( HDRS ../round.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations FLAGS @@ -908,7 +910,7 @@ add_entrypoint_object( HDRS ../roundf.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations FLAGS @@ -922,7 +924,7 @@ add_entrypoint_object( HDRS ../roundl.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations ) @@ -934,7 +936,7 @@ add_entrypoint_object( HDRS ../roundf16.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.cast libc.src.__support.FPUtil.nearest_integer_operations @@ -951,7 +953,7 @@ add_entrypoint_object( HDRS ../roundf128.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations @@ -964,7 +966,7 @@ add_entrypoint_object( HDRS ../roundeven.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations FLAGS @@ -978,7 +980,7 @@ add_entrypoint_object( HDRS ../roundevenf.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations FLAGS @@ -992,7 +994,7 @@ add_entrypoint_object( HDRS ../roundevenl.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations ) @@ -1004,7 +1006,7 @@ add_entrypoint_object( HDRS ../roundevenf16.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations @@ -1021,7 +1023,7 @@ add_entrypoint_object( HDRS ../roundevenf128.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations @@ -1034,7 +1036,7 @@ add_entrypoint_object( HDRS ../lround.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations ) @@ -1046,7 +1048,7 @@ add_entrypoint_object( HDRS ../lroundf.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations ) @@ -1058,7 +1060,7 @@ add_entrypoint_object( HDRS ../lroundl.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations ) @@ -1070,7 +1072,7 @@ add_entrypoint_object( HDRS ../lroundf16.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations @@ -1083,7 +1085,7 @@ add_entrypoint_object( HDRS ../lroundf128.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations @@ -1096,7 +1098,7 @@ add_entrypoint_object( HDRS ../llround.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations ) @@ -1108,7 +1110,7 @@ add_entrypoint_object( HDRS ../llroundf.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations ) @@ -1120,7 +1122,7 @@ add_entrypoint_object( HDRS ../llroundl.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations ) @@ -1132,7 +1134,7 @@ add_entrypoint_object( HDRS ../llroundf16.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations @@ -1145,7 +1147,7 @@ add_entrypoint_object( HDRS ../llroundf128.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations @@ -1158,7 +1160,7 @@ add_entrypoint_object( HDRS ../rint.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations FLAGS @@ -1172,7 +1174,7 @@ add_entrypoint_object( HDRS ../rintf.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations FLAGS @@ -1186,7 +1188,7 @@ add_entrypoint_object( HDRS ../rintl.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations ) @@ -1198,7 +1200,7 @@ add_entrypoint_object( HDRS ../rintf16.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.cast libc.src.__support.FPUtil.nearest_integer_operations @@ -1215,7 +1217,7 @@ add_entrypoint_object( HDRS ../rintf128.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations @@ -1228,7 +1230,7 @@ add_entrypoint_object( HDRS ../lrint.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations ) @@ -1240,7 +1242,7 @@ add_entrypoint_object( HDRS ../lrintf.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations ) @@ -1252,7 +1254,7 @@ add_entrypoint_object( HDRS ../lrintl.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations ) @@ -1264,7 +1266,7 @@ add_entrypoint_object( HDRS ../lrintf16.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations @@ -1277,7 +1279,7 @@ add_entrypoint_object( HDRS ../lrintf128.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations @@ -1290,7 +1292,7 @@ add_entrypoint_object( HDRS ../llrint.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations ) @@ -1302,7 +1304,7 @@ add_entrypoint_object( HDRS ../llrintf.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations ) @@ -1314,7 +1316,7 @@ add_entrypoint_object( HDRS ../llrintl.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.nearest_integer_operations ) @@ -1326,7 +1328,7 @@ add_entrypoint_object( HDRS ../llrintf16.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations @@ -1339,7 +1341,7 @@ add_entrypoint_object( HDRS ../llrintf128.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations @@ -1354,7 +1356,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.nearest_integer_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -1366,7 +1368,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.nearest_integer_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -1378,7 +1380,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.nearest_integer_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -1391,7 +1393,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -1404,7 +1406,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_object_library( @@ -1429,7 +1431,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.polyeval libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -1455,7 +1457,7 @@ add_entrypoint_object( libc.src.__support.macros.optimization libc.src.errno.errno COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -1476,7 +1478,7 @@ add_entrypoint_object( libc.src.__support.macros.optimization libc.src.errno.errno COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -1500,7 +1502,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -1526,7 +1528,7 @@ add_entrypoint_object( libc.src.__support.macros.optimization libc.src.errno.errno COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_header_library( @@ -1556,7 +1558,7 @@ add_entrypoint_object( DEPENDS .exp2f_impl COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -1576,7 +1578,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -1598,7 +1600,7 @@ add_entrypoint_object( libc.src.__support.macros.optimization libc.src.__support.macros.properties.cpu_features COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -1622,7 +1624,7 @@ add_entrypoint_object( libc.src.__support.macros.optimization libc.src.__support.macros.properties.cpu_features COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -1648,7 +1650,7 @@ add_entrypoint_object( libc.src.__support.macros.optimization libc.src.errno.errno COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_header_library( @@ -1667,7 +1669,7 @@ add_header_library( libc.src.__support.common libc.src.errno.errno COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -1679,7 +1681,7 @@ add_entrypoint_object( DEPENDS .exp10f_impl COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -1704,7 +1706,7 @@ add_entrypoint_object( libc.src.__support.macros.optimization libc.src.__support.macros.properties.cpu_features COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -1725,7 +1727,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -1748,7 +1750,7 @@ add_entrypoint_object( libc.src.__support.macros.optimization libc.src.__support.macros.properties.cpu_features COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -1774,7 +1776,7 @@ add_entrypoint_object( libc.src.__support.macros.optimization libc.src.errno.errno COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -1795,7 +1797,7 @@ add_entrypoint_object( libc.src.__support.macros.optimization libc.src.errno.errno COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -1817,7 +1819,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -1844,7 +1846,7 @@ add_entrypoint_object( libc.src.__support.macros.optimization libc.src.errno.errno COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -1867,7 +1869,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.sqrt libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -1879,7 +1881,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} FLAGS MISC_MATH_BASIC_OPS_OPT ) @@ -1893,7 +1895,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} FLAGS MISC_MATH_BASIC_OPS_OPT ) @@ -1907,7 +1909,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -1920,7 +1922,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} FLAGS MISC_MATH_BASIC_OPS_OPT ) @@ -1935,7 +1937,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -1945,7 +1947,7 @@ add_entrypoint_object( HDRS ../frexp.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.manipulation_functions ) @@ -1957,7 +1959,7 @@ add_entrypoint_object( HDRS ../frexpf.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.manipulation_functions ) @@ -1969,7 +1971,7 @@ add_entrypoint_object( HDRS ../frexpl.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.manipulation_functions ) @@ -1981,7 +1983,7 @@ add_entrypoint_object( HDRS ../frexpf16.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.manipulation_functions @@ -1994,7 +1996,7 @@ add_entrypoint_object( HDRS ../frexpf128.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.manipulation_functions @@ -2007,7 +2009,7 @@ add_entrypoint_object( HDRS ../ilogb.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.manipulation_functions ) @@ -2019,7 +2021,7 @@ add_entrypoint_object( HDRS ../ilogbf.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.manipulation_functions ) @@ -2031,7 +2033,7 @@ add_entrypoint_object( HDRS ../ilogbl.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.manipulation_functions ) @@ -2043,7 +2045,7 @@ add_entrypoint_object( HDRS ../ilogbf16.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.manipulation_functions @@ -2056,7 +2058,7 @@ add_entrypoint_object( HDRS ../ilogbf128.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.manipulation_functions @@ -2069,7 +2071,7 @@ add_entrypoint_object( HDRS ../llogb.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.manipulation_functions ) @@ -2081,7 +2083,7 @@ add_entrypoint_object( HDRS ../llogbf.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.manipulation_functions ) @@ -2093,7 +2095,7 @@ add_entrypoint_object( HDRS ../llogbl.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.manipulation_functions ) @@ -2105,7 +2107,7 @@ add_entrypoint_object( HDRS ../llogbf16.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.manipulation_functions @@ -2118,7 +2120,7 @@ add_entrypoint_object( HDRS ../llogbf128.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.manipulation_functions @@ -2131,7 +2133,7 @@ add_entrypoint_object( HDRS ../ldexp.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.manipulation_functions ) @@ -2143,7 +2145,7 @@ add_entrypoint_object( HDRS ../ldexpf.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.manipulation_functions ) @@ -2155,7 +2157,7 @@ add_entrypoint_object( HDRS ../ldexpl.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.manipulation_functions ) @@ -2167,7 +2169,7 @@ add_entrypoint_object( HDRS ../ldexpf16.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.manipulation_functions @@ -2180,7 +2182,7 @@ add_entrypoint_object( HDRS ../ldexpf128.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.manipulation_functions @@ -2225,7 +2227,7 @@ add_entrypoint_object( libc.src.__support.integer_literals libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2242,7 +2244,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.fma libc.src.__support.FPUtil.polyeval COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2264,7 +2266,7 @@ add_entrypoint_object( libc.src.__support.macros.optimization libc.src.__support.macros.properties.cpu_features COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2284,7 +2286,7 @@ add_entrypoint_object( libc.src.__support.integer_literals libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2302,7 +2304,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.polyeval libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2323,7 +2325,7 @@ add_entrypoint_object( libc.src.__support.integer_literals libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2341,7 +2343,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.polyeval libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2363,7 +2365,7 @@ add_entrypoint_object( libc.src.__support.macros.optimization libc.src.__support.macros.properties.cpu_features COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2384,7 +2386,7 @@ add_entrypoint_object( libc.src.__support.integer_literals libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2402,7 +2404,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.polyeval libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2424,7 +2426,7 @@ add_entrypoint_object( libc.src.__support.macros.optimization libc.src.__support.macros.properties.cpu_features COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2434,7 +2436,7 @@ add_entrypoint_object( HDRS ../logb.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.manipulation_functions ) @@ -2446,7 +2448,7 @@ add_entrypoint_object( HDRS ../logbf.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.manipulation_functions ) @@ -2458,7 +2460,7 @@ add_entrypoint_object( HDRS ../logbl.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.manipulation_functions ) @@ -2470,7 +2472,7 @@ add_entrypoint_object( HDRS ../logbf16.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.manipulation_functions @@ -2483,7 +2485,7 @@ add_entrypoint_object( HDRS ../logbf128.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.manipulation_functions @@ -2498,7 +2500,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2510,7 +2512,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2522,7 +2524,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2535,7 +2537,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2548,7 +2550,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2560,7 +2562,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} FLAGS MISC_MATH_BASIC_OPS_OPT ) @@ -2574,7 +2576,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} FLAGS MISC_MATH_BASIC_OPS_OPT ) @@ -2588,7 +2590,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2601,7 +2603,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2614,7 +2616,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} FLAGS MISC_MATH_BASIC_OPS_OPT ) @@ -2629,7 +2631,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} FLAGS MISC_MATH_BASIC_OPS_OPT ) @@ -2643,7 +2645,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} FLAGS MISC_MATH_BASIC_OPS_OPT ) @@ -2657,7 +2659,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2670,7 +2672,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2683,7 +2685,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} FLAGS MISC_MATH_BASIC_OPS_OPT ) @@ -2697,7 +2699,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} FLAGS MISC_MATH_BASIC_OPS_OPT ) @@ -2711,7 +2713,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} FLAGS MISC_MATH_BASIC_OPS_OPT ) @@ -2725,7 +2727,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2738,7 +2740,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} FLAGS MISC_MATH_BASIC_OPS_OPT ) @@ -2753,7 +2755,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2765,7 +2767,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} FLAGS MISC_MATH_BASIC_OPS_OPT ) @@ -2779,7 +2781,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} FLAGS MISC_MATH_BASIC_OPS_OPT ) @@ -2793,7 +2795,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2806,7 +2808,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} FLAGS MISC_MATH_BASIC_OPS_OPT ) @@ -2821,7 +2823,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2870,7 +2872,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2883,7 +2885,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2932,7 +2934,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2945,7 +2947,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2957,7 +2959,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} FLAGS MISC_MATH_BASIC_OPS_OPT ) @@ -2971,7 +2973,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} FLAGS MISC_MATH_BASIC_OPS_OPT ) @@ -2985,7 +2987,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -2998,7 +3000,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} FLAGS MISC_MATH_BASIC_OPS_OPT ) @@ -3013,7 +3015,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3025,7 +3027,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} FLAGS MISC_MATH_BASIC_OPS_OPT ) @@ -3039,7 +3041,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} FLAGS MISC_MATH_BASIC_OPS_OPT ) @@ -3053,7 +3055,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3066,7 +3068,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} FLAGS MISC_MATH_BASIC_OPS_OPT ) @@ -3081,7 +3083,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3130,7 +3132,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3143,7 +3145,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3192,7 +3194,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3205,7 +3207,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3219,7 +3221,7 @@ add_entrypoint_object( libc.hdr.fenv_macros libc.src.__support.FPUtil.double_double COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3231,7 +3233,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.generic.mul COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3244,7 +3246,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.generic.mul COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3256,7 +3258,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.generic.add_sub COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3268,7 +3270,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.generic.add_sub COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3281,7 +3283,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.generic.add_sub COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3293,7 +3295,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.sqrt COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) @@ -3306,7 +3308,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.sqrt COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3318,7 +3320,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.sqrt COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3330,7 +3332,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.sqrt COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3343,7 +3345,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.sqrt COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3355,7 +3357,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.division_and_remainder_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3367,7 +3369,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.division_and_remainder_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3379,7 +3381,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.division_and_remainder_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3391,7 +3393,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.division_and_remainder_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3404,7 +3406,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.division_and_remainder_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3416,7 +3418,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.division_and_remainder_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3428,7 +3430,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.division_and_remainder_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3440,7 +3442,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.division_and_remainder_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3453,7 +3455,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.division_and_remainder_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3466,7 +3468,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.division_and_remainder_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3483,7 +3485,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.sqrt libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3493,7 +3495,7 @@ add_entrypoint_object( HDRS ../fdim.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.basic_operations ) @@ -3505,7 +3507,7 @@ add_entrypoint_object( HDRS ../fdimf.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.basic_operations ) @@ -3517,7 +3519,7 @@ add_entrypoint_object( HDRS ../fdiml.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.basic_operations ) @@ -3529,7 +3531,7 @@ add_entrypoint_object( HDRS ../fdimf16.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations @@ -3542,7 +3544,7 @@ add_entrypoint_object( HDRS ../fdimf128.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations @@ -3555,7 +3557,7 @@ add_entrypoint_object( HDRS ../fdiv.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.generic.div ) @@ -3567,7 +3569,7 @@ add_entrypoint_object( HDRS ../fdivl.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.generic.div ) @@ -3579,7 +3581,7 @@ add_entrypoint_object( HDRS ../fdivf128.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.generic.div @@ -3592,7 +3594,7 @@ add_entrypoint_object( HDRS ../ffma.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.fma ) @@ -3604,7 +3606,7 @@ add_entrypoint_object( HDRS ../ffmal.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.FPUtil.fma ) @@ -3616,7 +3618,7 @@ add_entrypoint_object( HDRS ../ffmaf128.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.fma @@ -3631,7 +3633,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.hypot COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3641,7 +3643,7 @@ add_entrypoint_object( HDRS ../issignaling.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3651,7 +3653,7 @@ add_entrypoint_object( HDRS ../issignalingf.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3661,7 +3663,7 @@ add_entrypoint_object( HDRS ../issignalingl.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3671,7 +3673,7 @@ add_entrypoint_object( HDRS ../issignalingf16.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types ) @@ -3683,7 +3685,7 @@ add_entrypoint_object( HDRS ../issignalingf128.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.src.__support.macros.properties.types ) @@ -3695,7 +3697,7 @@ add_entrypoint_object( HDRS ../isnan.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3705,7 +3707,7 @@ add_entrypoint_object( HDRS ../isnanf.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3715,7 +3717,7 @@ add_entrypoint_object( HDRS ../isnanl.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3728,7 +3730,7 @@ add_entrypoint_object( libc.src.__support.str_to_float libc.src.errno.errno COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3741,7 +3743,7 @@ add_entrypoint_object( libc.src.__support.str_to_float libc.src.errno.errno COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3754,7 +3756,7 @@ add_entrypoint_object( libc.src.__support.str_to_float libc.src.errno.errno COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3767,7 +3769,7 @@ add_entrypoint_object( libc.src.__support.str_to_float libc.src.errno.errno COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3780,7 +3782,7 @@ add_entrypoint_object( libc.src.__support.str_to_float libc.src.errno.errno COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3792,7 +3794,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3804,7 +3806,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3816,7 +3818,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3829,7 +3831,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3842,7 +3844,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3854,7 +3856,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3866,7 +3868,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3878,7 +3880,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3891,7 +3893,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3903,7 +3905,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3915,7 +3917,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3927,7 +3929,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3940,7 +3942,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3953,7 +3955,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3965,7 +3967,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3977,7 +3979,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -3989,7 +3991,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4002,7 +4004,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4015,7 +4017,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4027,7 +4029,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.generic.fmod COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4039,7 +4041,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.generic.fmod COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4051,7 +4053,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.generic.fmod COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4064,7 +4066,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.generic.fmod COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4077,7 +4079,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.generic.fmod COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4089,7 +4091,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.nearest_integer_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4101,7 +4103,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.nearest_integer_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4113,7 +4115,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.nearest_integer_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4126,7 +4128,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4139,7 +4141,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4151,7 +4153,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.nearest_integer_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4163,7 +4165,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.nearest_integer_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4175,7 +4177,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.nearest_integer_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4188,7 +4190,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4201,7 +4203,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4213,7 +4215,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.nearest_integer_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4225,7 +4227,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.nearest_integer_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4237,7 +4239,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.nearest_integer_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4250,7 +4252,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4263,7 +4265,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4275,7 +4277,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.nearest_integer_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4287,7 +4289,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.nearest_integer_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4299,7 +4301,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.nearest_integer_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4312,7 +4314,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4325,7 +4327,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) #TODO: Add errno include to the hyperbolic functions. @@ -4349,7 +4351,7 @@ add_object_library( libc.src.__support.common libc.src.errno.errno COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4365,7 +4367,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4384,7 +4386,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4399,7 +4401,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4418,7 +4420,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4435,7 +4437,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.polyeval libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4458,7 +4460,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4476,7 +4478,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.sqrt libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4493,7 +4495,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.sqrt libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4507,7 +4509,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.fp_bits libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_object_library( @@ -4537,7 +4539,7 @@ add_entrypoint_object( libc.src.__support.macros.optimization .inv_trigf_utils COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4555,7 +4557,7 @@ add_entrypoint_object( libc.src.__support.macros.optimization .inv_trigf_utils COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4574,7 +4576,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4584,7 +4586,7 @@ add_entrypoint_object( HDRS ../atan2f.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS .inv_trigf_utils libc.src.__support.FPUtil.fp_bits @@ -4602,7 +4604,7 @@ add_entrypoint_object( HDRS ../atan2.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS .inv_trigf_utils libc.src.__support.FPUtil.double_double @@ -4622,7 +4624,7 @@ add_entrypoint_object( HDRS ../atan2l.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS .atan2 ) @@ -4637,7 +4639,7 @@ add_entrypoint_object( libc.hdr.float_macros libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4650,7 +4652,7 @@ add_entrypoint_object( libc.hdr.float_macros libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4663,7 +4665,7 @@ add_entrypoint_object( libc.hdr.float_macros libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4677,7 +4679,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4691,7 +4693,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4704,7 +4706,7 @@ add_entrypoint_object( libc.hdr.float_macros libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4717,7 +4719,7 @@ add_entrypoint_object( libc.hdr.float_macros libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4730,7 +4732,7 @@ add_entrypoint_object( libc.hdr.float_macros libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4744,7 +4746,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4758,7 +4760,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4770,7 +4772,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.fma COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4782,7 +4784,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.fma COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4794,7 +4796,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4806,7 +4808,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4818,7 +4820,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4830,7 +4832,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4843,7 +4845,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.basic_operations libc.src.__support.macros.properties.types COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( totalordermag @@ -4854,7 +4856,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4866,7 +4868,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4878,7 +4880,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4890,7 +4892,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4903,7 +4905,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.basic_operations libc.src.__support.macros.properties.types COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4915,7 +4917,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4927,7 +4929,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4939,7 +4941,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4952,7 +4954,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4965,7 +4967,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4977,7 +4979,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -4989,7 +4991,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5001,7 +5003,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5014,7 +5016,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5027,7 +5029,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5039,7 +5041,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5051,7 +5053,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5063,7 +5065,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5076,7 +5078,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5089,7 +5091,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.basic_operations COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5102,7 +5104,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.generic.add_sub COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5115,7 +5117,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.generic.add_sub COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5128,7 +5130,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.generic.add_sub COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5141,7 +5143,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.generic.add_sub COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5154,7 +5156,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.generic.add_sub COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5167,7 +5169,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.generic.add_sub COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5180,7 +5182,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.generic.add_sub COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5193,7 +5195,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.generic.add_sub COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5206,7 +5208,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.generic.div COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5219,7 +5221,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.generic.div COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5232,7 +5234,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.generic.div COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5245,7 +5247,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.generic.div COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5258,7 +5260,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.fma COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5271,7 +5273,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.fma COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5284,7 +5286,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.fma COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5297,7 +5299,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.fma COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5310,7 +5312,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.sqrt COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5323,7 +5325,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.sqrt COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5336,7 +5338,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.sqrt COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5349,7 +5351,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.sqrt COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5361,7 +5363,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.generic.sqrt COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5373,7 +5375,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.generic.sqrt COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5386,7 +5388,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.generic.sqrt COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5396,7 +5398,7 @@ add_entrypoint_object( HDRS ../cbrtf.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.hdr.fenv_macros libc.src.__support.FPUtil.fenv_impl @@ -5412,7 +5414,7 @@ add_entrypoint_object( HDRS ../cbrt.h COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} DEPENDS libc.hdr.fenv_macros libc.src.__support.FPUtil.double_double @@ -5434,7 +5436,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.generic.mul COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5447,7 +5449,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.generic.mul COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5460,7 +5462,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.generic.mul COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5473,7 +5475,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.generic.mul COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5486,7 +5488,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.generic.mul COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_entrypoint_object( @@ -5499,7 +5501,7 @@ add_entrypoint_object( libc.src.__support.macros.properties.types libc.src.__support.FPUtil.generic.mul COMPILE_OPTIONS - -O3 + ${libc_opt_high_flag} ) add_header_library( diff --git a/libc/src/string/memory_utils/op_x86.h b/libc/src/string/memory_utils/op_x86.h index ab694e25fe0fe..309610e4ad630 100644 --- a/libc/src/string/memory_utils/op_x86.h +++ b/libc/src/string/memory_utils/op_x86.h @@ -29,12 +29,15 @@ // Define fake functions to prevent the compiler from failing on undefined // functions in case the CPU extension is not present. #if !defined(__AVX512BW__) && (defined(_MSC_VER) || defined(__SCE__)) +#undef _mm512_cmpneq_epi8_mask #define _mm512_cmpneq_epi8_mask(A, B) 0 #endif #if !defined(__AVX2__) && (defined(_MSC_VER) || defined(__SCE__)) +#undef _mm256_movemask_epi8 #define _mm256_movemask_epi8(A) 0 #endif #if !defined(__SSE2__) && (defined(_MSC_VER) || defined(__SCE__)) +#undef _mm_movemask_epi8 #define _mm_movemask_epi8(A) 0 #endif diff --git a/libc/utils/gpu/loader/Loader.h b/libc/utils/gpu/loader/Loader.h index 8be8c0d5f8553..497f8114728bf 100644 --- a/libc/utils/gpu/loader/Loader.h +++ b/libc/utils/gpu/loader/Loader.h @@ -11,9 +11,11 @@ #include "utils/gpu/server/llvmlibc_rpc_server.h" -#include "include/llvm-libc-types/rpc_opcodes_t.h" #include "include/llvm-libc-types/test_rpc_opcodes_t.h" +#include "shared/rpc.h" +#include "shared/rpc_opcodes.h" + #include #include #include @@ -103,129 +105,95 @@ inline void handle_error_impl(const char *file, int32_t line, const char *msg) { fprintf(stderr, "%s:%d:0: Error: %s\n", file, line, msg); exit(EXIT_FAILURE); } - -inline void handle_error_impl(const char *file, int32_t line, - rpc_status_t err) { - fprintf(stderr, "%s:%d:0: Error: %d\n", file, line, err); - exit(EXIT_FAILURE); -} #define handle_error(X) handle_error_impl(__FILE__, __LINE__, X) -template -inline void register_rpc_callbacks(rpc_device_t device) { - static_assert(lane_size == 32 || lane_size == 64, "Invalid Lane size"); - // Register the ping test for the `libc` tests. - rpc_register_callback( - device, static_cast(RPC_TEST_INCREMENT), - [](rpc_port_t port, void *data) { - rpc_recv_and_send( - port, - [](rpc_buffer_t *buffer, void *data) { - reinterpret_cast(buffer->data)[0] += 1; - }, - data); - }, - nullptr); - - // Register the interface test callbacks. - rpc_register_callback( - device, static_cast(RPC_TEST_INTERFACE), - [](rpc_port_t port, void *data) { - uint64_t cnt = 0; - bool end_with_recv; - rpc_recv( - port, - [](rpc_buffer_t *buffer, void *data) { - *reinterpret_cast(data) = buffer->data[0]; - }, - &end_with_recv); - rpc_recv( - port, - [](rpc_buffer_t *buffer, void *data) { - *reinterpret_cast(data) = buffer->data[0]; - }, - &cnt); - rpc_send( - port, - [](rpc_buffer_t *buffer, void *data) { - uint64_t &cnt = *reinterpret_cast(data); - buffer->data[0] = cnt = cnt + 1; - }, - &cnt); - rpc_recv( - port, - [](rpc_buffer_t *buffer, void *data) { - *reinterpret_cast(data) = buffer->data[0]; - }, - &cnt); - rpc_send( - port, - [](rpc_buffer_t *buffer, void *data) { - uint64_t &cnt = *reinterpret_cast(data); - buffer->data[0] = cnt = cnt + 1; - }, - &cnt); - rpc_recv( - port, - [](rpc_buffer_t *buffer, void *data) { - *reinterpret_cast(data) = buffer->data[0]; - }, - &cnt); - rpc_recv( - port, - [](rpc_buffer_t *buffer, void *data) { - *reinterpret_cast(data) = buffer->data[0]; - }, - &cnt); - rpc_send( - port, - [](rpc_buffer_t *buffer, void *data) { - uint64_t &cnt = *reinterpret_cast(data); - buffer->data[0] = cnt = cnt + 1; - }, - &cnt); - rpc_send( - port, - [](rpc_buffer_t *buffer, void *data) { - uint64_t &cnt = *reinterpret_cast(data); - buffer->data[0] = cnt = cnt + 1; - }, - &cnt); - if (end_with_recv) - rpc_recv( - port, - [](rpc_buffer_t *buffer, void *data) { - *reinterpret_cast(data) = buffer->data[0]; - }, - &cnt); - else - rpc_send( - port, - [](rpc_buffer_t *buffer, void *data) { - uint64_t &cnt = *reinterpret_cast(data); - buffer->data[0] = cnt = cnt + 1; - }, - &cnt); - }, - nullptr); - - // Register the stream test handler. - rpc_register_callback( - device, static_cast(RPC_TEST_STREAM), - [](rpc_port_t port, void *data) { - uint64_t sizes[lane_size] = {0}; - void *dst[lane_size] = {nullptr}; - rpc_recv_n( - port, dst, sizes, - [](uint64_t size, void *) -> void * { return new char[size]; }, - nullptr); - rpc_send_n(port, dst, sizes); - for (uint64_t i = 0; i < lane_size; ++i) { - if (dst[i]) - delete[] reinterpret_cast(dst[i]); - } - }, - nullptr); +template +inline uint32_t handle_server(rpc::Server &server, uint32_t index, + Alloc &&alloc, Free &&free) { + auto port = server.try_open(num_lanes, index); + if (!port) + return 0; + index = port->get_index() + 1; + + int status = rpc::SUCCESS; + switch (port->get_opcode()) { + case RPC_TEST_INCREMENT: { + port->recv_and_send([](rpc::Buffer *buffer, uint32_t) { + reinterpret_cast(buffer->data)[0] += 1; + }); + break; + } + case RPC_TEST_INTERFACE: { + bool end_with_recv; + uint64_t cnt; + port->recv([&](rpc::Buffer *buffer, uint32_t) { + end_with_recv = buffer->data[0]; + }); + port->recv([&](rpc::Buffer *buffer, uint32_t) { cnt = buffer->data[0]; }); + port->send([&](rpc::Buffer *buffer, uint32_t) { + buffer->data[0] = cnt = cnt + 1; + }); + port->recv([&](rpc::Buffer *buffer, uint32_t) { cnt = buffer->data[0]; }); + port->send([&](rpc::Buffer *buffer, uint32_t) { + buffer->data[0] = cnt = cnt + 1; + }); + port->recv([&](rpc::Buffer *buffer, uint32_t) { cnt = buffer->data[0]; }); + port->recv([&](rpc::Buffer *buffer, uint32_t) { cnt = buffer->data[0]; }); + port->send([&](rpc::Buffer *buffer, uint32_t) { + buffer->data[0] = cnt = cnt + 1; + }); + port->send([&](rpc::Buffer *buffer, uint32_t) { + buffer->data[0] = cnt = cnt + 1; + }); + if (end_with_recv) + port->recv([&](rpc::Buffer *buffer, uint32_t) { cnt = buffer->data[0]; }); + else + port->send([&](rpc::Buffer *buffer, uint32_t) { + buffer->data[0] = cnt = cnt + 1; + }); + + break; + } + case RPC_TEST_STREAM: { + uint64_t sizes[num_lanes] = {0}; + void *dst[num_lanes] = {nullptr}; + port->recv_n(dst, sizes, + [](uint64_t size) -> void * { return new char[size]; }); + port->send_n(dst, sizes); + for (uint64_t i = 0; i < num_lanes; ++i) { + if (dst[i]) + delete[] reinterpret_cast(dst[i]); + } + break; + } + case RPC_TEST_NOOP: { + port->recv([&](rpc::Buffer *, uint32_t) {}); + break; + } + case RPC_MALLOC: { + port->recv_and_send([&](rpc::Buffer *buffer, uint32_t) { + buffer->data[0] = reinterpret_cast(alloc(buffer->data[0])); + }); + break; + } + case RPC_FREE: { + port->recv([&](rpc::Buffer *buffer, uint32_t) { + free(reinterpret_cast(buffer->data[0])); + }); + break; + } + default: + status = libc_handle_rpc_port(&*port, num_lanes); + break; + } + + // Handle all of the `libc` specific opcodes. + if (status != rpc::SUCCESS) + handle_error("Error handling RPC server"); + + port->close(); + + return index; } #endif diff --git a/libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp b/libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp index d825a6299263a..13a1366833547 100644 --- a/libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp +++ b/libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp @@ -160,7 +160,7 @@ template hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable, hsa_amd_memory_pool_t kernargs_pool, hsa_amd_memory_pool_t coarsegrained_pool, - hsa_queue_t *queue, rpc_device_t device, + hsa_queue_t *queue, rpc::Server &server, const LaunchParameters ¶ms, const char *kernel_name, args_t kernel_args, bool print_resource_usage) { @@ -170,37 +170,10 @@ hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable, executable, kernel_name, &dev_agent, &symbol)) return err; - // Register RPC callbacks for the malloc and free functions on HSA. - auto tuple = std::make_tuple(dev_agent, coarsegrained_pool); - rpc_register_callback( - device, RPC_MALLOC, - [](rpc_port_t port, void *data) { - auto malloc_handler = [](rpc_buffer_t *buffer, void *data) -> void { - auto &[dev_agent, pool] = *static_cast(data); - uint64_t size = buffer->data[0]; - void *dev_ptr = nullptr; - if (hsa_status_t err = - hsa_amd_memory_pool_allocate(pool, size, - /*flags=*/0, &dev_ptr)) - dev_ptr = nullptr; - hsa_amd_agents_allow_access(1, &dev_agent, nullptr, dev_ptr); - buffer->data[0] = reinterpret_cast(dev_ptr); - }; - rpc_recv_and_send(port, malloc_handler, data); - }, - &tuple); - rpc_register_callback( - device, RPC_FREE, - [](rpc_port_t port, void *data) { - auto free_handler = [](rpc_buffer_t *buffer, void *) { - if (hsa_status_t err = hsa_amd_memory_pool_free( - reinterpret_cast(buffer->data[0]))) - handle_error(err); - }; - rpc_recv_and_send(port, free_handler, data); - }, - nullptr); - + uint32_t wavefront_size = 0; + if (hsa_status_t err = hsa_agent_get_info( + dev_agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size)) + handle_error(err); // Retrieve different properties of the kernel symbol used for launch. uint64_t kernel; uint32_t args_size; @@ -292,14 +265,38 @@ hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable, hsa_signal_store_relaxed(queue->doorbell_signal, packet_id); std::atomic finished = false; - std::thread server( - [](std::atomic *finished, rpc_device_t device) { - while (!*finished) { - if (rpc_status_t err = rpc_handle_server(device)) + std::thread server_thread( + [](std::atomic *finished, rpc::Server *server, + uint32_t wavefront_size, hsa_agent_t dev_agent, + hsa_amd_memory_pool_t coarsegrained_pool) { + // Register RPC callbacks for the malloc and free functions on HSA. + auto malloc_handler = [&](size_t size) -> void * { + void *dev_ptr = nullptr; + if (hsa_status_t err = + hsa_amd_memory_pool_allocate(coarsegrained_pool, size, + /*flags=*/0, &dev_ptr)) + dev_ptr = nullptr; + hsa_amd_agents_allow_access(1, &dev_agent, nullptr, dev_ptr); + return dev_ptr; + }; + + auto free_handler = [](void *ptr) -> void { + if (hsa_status_t err = + hsa_amd_memory_pool_free(reinterpret_cast(ptr))) handle_error(err); + }; + + uint32_t index = 0; + while (!*finished) { + if (wavefront_size == 32) + index = + handle_server<32>(*server, index, malloc_handler, free_handler); + else + index = + handle_server<64>(*server, index, malloc_handler, free_handler); } }, - &finished, device); + &finished, &server, wavefront_size, dev_agent, coarsegrained_pool); // Wait until the kernel has completed execution on the device. Periodically // check the RPC client for work to be performed on the server. @@ -309,8 +306,8 @@ hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable, ; finished = true; - if (server.joinable()) - server.join(); + if (server_thread.joinable()) + server_thread.join(); // Destroy the resources acquired to launch the kernel and return. if (hsa_status_t err = hsa_amd_memory_pool_free(args)) @@ -457,34 +454,22 @@ int load(int argc, const char **argv, const char **envp, void *image, handle_error(err); // Set up the RPC server. - auto tuple = std::make_tuple(dev_agent, finegrained_pool); - auto rpc_alloc = [](uint64_t size, void *data) { - auto &[dev_agent, finegrained_pool] = *static_cast(data); - void *dev_ptr = nullptr; - if (hsa_status_t err = hsa_amd_memory_pool_allocate(finegrained_pool, size, - /*flags=*/0, &dev_ptr)) - handle_error(err); - hsa_amd_agents_allow_access(1, &dev_agent, nullptr, dev_ptr); - return dev_ptr; - }; - rpc_device_t device; - if (rpc_status_t err = rpc_server_init(&device, RPC_MAXIMUM_PORT_COUNT, - wavefront_size, rpc_alloc, &tuple)) + void *rpc_buffer; + if (hsa_status_t err = hsa_amd_memory_pool_allocate( + finegrained_pool, + rpc::Server::allocation_size(wavefront_size, rpc::MAX_PORT_COUNT), + /*flags=*/0, &rpc_buffer)) handle_error(err); + hsa_amd_agents_allow_access(1, &dev_agent, nullptr, rpc_buffer); - // Register callbacks for the RPC unit tests. - if (wavefront_size == 32) - register_rpc_callbacks<32>(device); - else if (wavefront_size == 64) - register_rpc_callbacks<64>(device); - else - handle_error("Invalid wavefront size"); + rpc::Server server(rpc::MAX_PORT_COUNT, rpc_buffer); + rpc::Client client(rpc::MAX_PORT_COUNT, rpc_buffer); // Initialize the RPC client on the device by copying the local data to the // device's internal pointer. hsa_executable_symbol_t rpc_client_sym; if (hsa_status_t err = hsa_executable_get_symbol_by_name( - executable, rpc_client_symbol_name, &dev_agent, &rpc_client_sym)) + executable, "__llvm_libc_rpc_client", &dev_agent, &rpc_client_sym)) handle_error(err); void *rpc_client_host; @@ -507,19 +492,17 @@ int load(int argc, const char **argv, const char **envp, void *image, void *rpc_client_buffer; if (hsa_status_t err = - hsa_amd_memory_lock(const_cast(rpc_get_client_buffer(device)), - rpc_get_client_size(), + hsa_amd_memory_lock(&client, sizeof(rpc::Client), /*agents=*/nullptr, 0, &rpc_client_buffer)) handle_error(err); // Copy the RPC client buffer to the address pointed to by the symbol. if (hsa_status_t err = hsa_memcpy(*reinterpret_cast(rpc_client_host), dev_agent, - rpc_client_buffer, host_agent, rpc_get_client_size())) + rpc_client_buffer, host_agent, sizeof(rpc::Client))) handle_error(err); - if (hsa_status_t err = hsa_amd_memory_unlock( - const_cast(rpc_get_client_buffer(device)))) + if (hsa_status_t err = hsa_amd_memory_unlock(&client)) handle_error(err); if (hsa_status_t err = hsa_amd_memory_pool_free(rpc_client_host)) handle_error(err); @@ -571,7 +554,7 @@ int load(int argc, const char **argv, const char **envp, void *image, LaunchParameters single_threaded_params = {1, 1, 1, 1, 1, 1}; begin_args_t init_args = {argc, dev_argv, dev_envp}; if (hsa_status_t err = launch_kernel(dev_agent, executable, kernargs_pool, - coarsegrained_pool, queue, device, + coarsegrained_pool, queue, server, single_threaded_params, "_begin.kd", init_args, print_resource_usage)) handle_error(err); @@ -579,7 +562,7 @@ int load(int argc, const char **argv, const char **envp, void *image, start_args_t args = {argc, dev_argv, dev_envp, dev_ret}; if (hsa_status_t err = launch_kernel( dev_agent, executable, kernargs_pool, coarsegrained_pool, queue, - device, params, "_start.kd", args, print_resource_usage)) + server, params, "_start.kd", args, print_resource_usage)) handle_error(err); void *host_ret; @@ -598,14 +581,12 @@ int load(int argc, const char **argv, const char **envp, void *image, end_args_t fini_args = {ret}; if (hsa_status_t err = launch_kernel(dev_agent, executable, kernargs_pool, - coarsegrained_pool, queue, device, + coarsegrained_pool, queue, server, single_threaded_params, "_end.kd", fini_args, print_resource_usage)) handle_error(err); - if (rpc_status_t err = rpc_server_shutdown( - device, [](void *ptr, void *) { hsa_amd_memory_pool_free(ptr); }, - nullptr)) + if (hsa_status_t err = hsa_amd_memory_pool_free(rpc_buffer)) handle_error(err); // Free the memory allocated for the device. diff --git a/libc/utils/gpu/loader/nvptx/nvptx-loader.cpp b/libc/utils/gpu/loader/nvptx/nvptx-loader.cpp index 58e5e5f04d0a7..0ba217451feae 100644 --- a/libc/utils/gpu/loader/nvptx/nvptx-loader.cpp +++ b/libc/utils/gpu/loader/nvptx/nvptx-loader.cpp @@ -167,10 +167,9 @@ void print_kernel_resources(CUmodule binary, const char *kernel_name) { } template -CUresult launch_kernel(CUmodule binary, CUstream stream, - rpc_device_t rpc_device, const LaunchParameters ¶ms, - const char *kernel_name, args_t kernel_args, - bool print_resource_usage) { +CUresult launch_kernel(CUmodule binary, CUstream stream, rpc::Server &server, + const LaunchParameters ¶ms, const char *kernel_name, + args_t kernel_args, bool print_resource_usage) { // look up the '_start' kernel in the loaded module. CUfunction function; if (CUresult err = cuModuleGetFunction(&function, binary, kernel_name)) @@ -181,23 +180,21 @@ CUresult launch_kernel(CUmodule binary, CUstream stream, void *args_config[] = {CU_LAUNCH_PARAM_BUFFER_POINTER, &kernel_args, CU_LAUNCH_PARAM_BUFFER_SIZE, &args_size, CU_LAUNCH_PARAM_END}; + if (print_resource_usage) + print_kernel_resources(binary, kernel_name); - // Initialize a non-blocking CUDA stream to allocate memory if needed. This - // needs to be done on a separate stream or else it will deadlock with the - // executing kernel. + // Initialize a non-blocking CUDA stream to allocate memory if needed. + // This needs to be done on a separate stream or else it will deadlock + // with the executing kernel. CUstream memory_stream; if (CUresult err = cuStreamCreate(&memory_stream, CU_STREAM_NON_BLOCKING)) handle_error(err); - // Register RPC callbacks for the malloc and free functions on HSA. - register_rpc_callbacks<32>(rpc_device); - - rpc_register_callback( - rpc_device, RPC_MALLOC, - [](rpc_port_t port, void *data) { - auto malloc_handler = [](rpc_buffer_t *buffer, void *data) -> void { - CUstream memory_stream = *static_cast(data); - uint64_t size = buffer->data[0]; + std::atomic finished = false; + std::thread server_thread( + [](std::atomic *finished, rpc::Server *server, + CUstream memory_stream) { + auto malloc_handler = [&](size_t size) -> void * { CUdeviceptr dev_ptr; if (CUresult err = cuMemAllocAsync(&dev_ptr, size, memory_stream)) dev_ptr = 0UL; @@ -205,36 +202,22 @@ CUresult launch_kernel(CUmodule binary, CUstream stream, // Wait until the memory allocation is complete. while (cuStreamQuery(memory_stream) == CUDA_ERROR_NOT_READY) ; - buffer->data[0] = static_cast(dev_ptr); + return reinterpret_cast(dev_ptr); }; - rpc_recv_and_send(port, malloc_handler, data); - }, - &memory_stream); - rpc_register_callback( - rpc_device, RPC_FREE, - [](rpc_port_t port, void *data) { - auto free_handler = [](rpc_buffer_t *buffer, void *data) { - CUstream memory_stream = *static_cast(data); - if (CUresult err = cuMemFreeAsync( - static_cast(buffer->data[0]), memory_stream)) + + auto free_handler = [&](void *ptr) -> void { + if (CUresult err = cuMemFreeAsync(reinterpret_cast(ptr), + memory_stream)) handle_error(err); }; - rpc_recv_and_send(port, free_handler, data); - }, - &memory_stream); - if (print_resource_usage) - print_kernel_resources(binary, kernel_name); - - std::atomic finished = false; - std::thread server( - [](std::atomic *finished, rpc_device_t device) { + uint32_t index = 0; while (!*finished) { - if (rpc_status_t err = rpc_handle_server(device)) - handle_error(err); + index = + handle_server<32>(*server, index, malloc_handler, free_handler); } }, - &finished, rpc_device); + &finished, &server, memory_stream); // Call the kernel with the given arguments. if (CUresult err = cuLaunchKernel( @@ -247,8 +230,8 @@ CUresult launch_kernel(CUmodule binary, CUstream stream, handle_error(err); finished = true; - if (server.joinable()) - server.join(); + if (server_thread.joinable()) + server_thread.join(); return CUDA_SUCCESS; } @@ -318,23 +301,20 @@ int load(int argc, const char **argv, const char **envp, void *image, handle_error(err); uint32_t warp_size = 32; - auto rpc_alloc = [](uint64_t size, void *) -> void * { - void *dev_ptr; - if (CUresult err = cuMemAllocHost(&dev_ptr, size)) - handle_error(err); - return dev_ptr; - }; - rpc_device_t rpc_device; - if (rpc_status_t err = rpc_server_init(&rpc_device, RPC_MAXIMUM_PORT_COUNT, - warp_size, rpc_alloc, nullptr)) + void *rpc_buffer = nullptr; + if (CUresult err = cuMemAllocHost( + &rpc_buffer, + rpc::Server::allocation_size(warp_size, rpc::MAX_PORT_COUNT))) handle_error(err); + rpc::Server server(rpc::MAX_PORT_COUNT, rpc_buffer); + rpc::Client client(rpc::MAX_PORT_COUNT, rpc_buffer); // Initialize the RPC client on the device by copying the local data to the // device's internal pointer. CUdeviceptr rpc_client_dev = 0; uint64_t client_ptr_size = sizeof(void *); if (CUresult err = cuModuleGetGlobal(&rpc_client_dev, &client_ptr_size, - binary, rpc_client_symbol_name)) + binary, "__llvm_libc_rpc_client")) handle_error(err); CUdeviceptr rpc_client_host = 0; @@ -342,20 +322,19 @@ int load(int argc, const char **argv, const char **envp, void *image, cuMemcpyDtoH(&rpc_client_host, rpc_client_dev, sizeof(void *))) handle_error(err); if (CUresult err = - cuMemcpyHtoD(rpc_client_host, rpc_get_client_buffer(rpc_device), - rpc_get_client_size())) + cuMemcpyHtoD(rpc_client_host, &client, sizeof(rpc::Client))) handle_error(err); LaunchParameters single_threaded_params = {1, 1, 1, 1, 1, 1}; begin_args_t init_args = {argc, dev_argv, dev_envp}; if (CUresult err = - launch_kernel(binary, stream, rpc_device, single_threaded_params, + launch_kernel(binary, stream, server, single_threaded_params, "_begin", init_args, print_resource_usage)) handle_error(err); start_args_t args = {argc, dev_argv, dev_envp, reinterpret_cast(dev_ret)}; - if (CUresult err = launch_kernel(binary, stream, rpc_device, params, "_start", + if (CUresult err = launch_kernel(binary, stream, server, params, "_start", args, print_resource_usage)) handle_error(err); @@ -369,8 +348,8 @@ int load(int argc, const char **argv, const char **envp, void *image, end_args_t fini_args = {host_ret}; if (CUresult err = - launch_kernel(binary, stream, rpc_device, single_threaded_params, - "_end", fini_args, print_resource_usage)) + launch_kernel(binary, stream, server, single_threaded_params, "_end", + fini_args, print_resource_usage)) handle_error(err); // Free the memory allocated for the device. @@ -380,8 +359,7 @@ int load(int argc, const char **argv, const char **envp, void *image, handle_error(err); if (CUresult err = cuMemFreeHost(dev_argv)) handle_error(err); - if (rpc_status_t err = rpc_server_shutdown( - rpc_device, [](void *ptr, void *) { cuMemFreeHost(ptr); }, nullptr)) + if (CUresult err = cuMemFreeHost(rpc_buffer)) handle_error(err); // Destroy the context and the loaded binary. diff --git a/libc/utils/gpu/server/CMakeLists.txt b/libc/utils/gpu/server/CMakeLists.txt index 50056fb376b69..36a4c2a8b051e 100644 --- a/libc/utils/gpu/server/CMakeLists.txt +++ b/libc/utils/gpu/server/CMakeLists.txt @@ -26,10 +26,6 @@ target_compile_definitions(llvmlibc_rpc_server PUBLIC install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/llvmlibc_rpc_server.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} COMPONENT libc-headers) -install(FILES ${LIBC_SOURCE_DIR}/include/llvm-libc-types/rpc_opcodes_t.h - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} - RENAME llvmlibc_rpc_opcodes.h - COMPONENT libc-headers) install(TARGETS llvmlibc_rpc_server ARCHIVE DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT libc) diff --git a/libc/utils/gpu/server/llvmlibc_rpc_server.h b/libc/utils/gpu/server/llvmlibc_rpc_server.h index 98df882afa21c..b7f173734345c 100644 --- a/libc/utils/gpu/server/llvmlibc_rpc_server.h +++ b/libc/utils/gpu/server/llvmlibc_rpc_server.h @@ -15,99 +15,7 @@ extern "C" { #endif -/// The maximum number of ports that can be opened for any server. -const uint64_t RPC_MAXIMUM_PORT_COUNT = 4096; - -/// The symbol name associated with the client for use with the LLVM C library -/// implementation. -const char *const rpc_client_symbol_name = "__llvm_libc_rpc_client"; - -/// status codes. -typedef enum { - RPC_STATUS_SUCCESS = 0x0, - RPC_STATUS_CONTINUE = 0x1, - RPC_STATUS_ERROR = 0x1000, - RPC_STATUS_UNHANDLED_OPCODE = 0x1001, - RPC_STATUS_INVALID_LANE_SIZE = 0x1002, -} rpc_status_t; - -/// A struct containing an opaque handle to an RPC port. This is what allows the -/// server to communicate with the client. -typedef struct rpc_port_s { - uint64_t handle; - uint32_t lane_size; -} rpc_port_t; - -/// A fixed-size buffer containing the payload sent from the client. -typedef struct rpc_buffer_s { - uint64_t data[8]; -} rpc_buffer_t; - -/// An opaque handle to an RPC server that can be attached to a device. -typedef struct rpc_device_s { - uintptr_t handle; -} rpc_device_t; - -/// A function used to allocate \p bytes for use by the RPC server and client. -/// The memory should support asynchronous and atomic access from both the -/// client and server. -typedef void *(*rpc_alloc_ty)(uint64_t size, void *data); - -/// A function used to free the \p ptr previously allocated. -typedef void (*rpc_free_ty)(void *ptr, void *data); - -/// A callback function provided with a \p port to communicate with the RPC -/// client. This will be called by the server to handle an opcode. -typedef void (*rpc_opcode_callback_ty)(rpc_port_t port, void *data); - -/// A callback function to use the port to receive or send a \p buffer. -typedef void (*rpc_port_callback_ty)(rpc_buffer_t *buffer, void *data); - -/// Initialize the server for a given device and return it in \p device. -rpc_status_t rpc_server_init(rpc_device_t *rpc_device, uint64_t num_ports, - uint32_t lane_size, rpc_alloc_ty alloc, - void *data); - -/// Shut down the server for a given device. -rpc_status_t rpc_server_shutdown(rpc_device_t rpc_device, rpc_free_ty dealloc, - void *data); - -/// Queries the RPC clients at least once and performs server-side work if there -/// are any active requests. Runs until all work on the server is completed. -rpc_status_t rpc_handle_server(rpc_device_t rpc_device); - -/// Register a callback to handle an opcode from the RPC client. The associated -/// data must remain accessible as long as the user intends to handle the server -/// with this callback. -rpc_status_t rpc_register_callback(rpc_device_t rpc_device, uint32_t opcode, - rpc_opcode_callback_ty callback, void *data); - -/// Obtain a pointer to a local client buffer that can be copied directly to the -/// other process using the address stored at the rpc client symbol name. -const void *rpc_get_client_buffer(rpc_device_t device); - -/// Returns the size of the client in bytes to be used for a memory copy. -uint64_t rpc_get_client_size(); - -/// Use the \p port to send a buffer using the \p callback. -void rpc_send(rpc_port_t port, rpc_port_callback_ty callback, void *data); - -/// Use the \p port to send \p bytes using the \p callback. The input is an -/// array of at least the configured lane size. -void rpc_send_n(rpc_port_t port, const void *const *src, uint64_t *size); - -/// Use the \p port to recieve a buffer using the \p callback. -void rpc_recv(rpc_port_t port, rpc_port_callback_ty callback, void *data); - -/// Use the \p port to recieve \p bytes using the \p callback. The inputs is an -/// array of at least the configured lane size. The \p alloc function allocates -/// memory for the recieved bytes. -void rpc_recv_n(rpc_port_t port, void **dst, uint64_t *size, rpc_alloc_ty alloc, - void *data); - -/// Use the \p port to receive and send a buffer using the \p callback. -void rpc_recv_and_send(rpc_port_t port, rpc_port_callback_ty callback, - void *data); +int libc_handle_rpc_port(void *port, uint32_t num_lanes); #ifdef __cplusplus } diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp index d877cbc25a13d..21af7ad7d5f1f 100644 --- a/libc/utils/gpu/server/rpc_server.cpp +++ b/libc/utils/gpu/server/rpc_server.cpp @@ -15,10 +15,10 @@ #include #include "shared/rpc.h" +#include "shared/rpc_opcodes.h" #include "llvmlibc_rpc_server.h" -#include "include/llvm-libc-types/rpc_opcodes_t.h" #include "src/__support/arg_list.h" #include "src/stdio/printf_core/converter.h" #include "src/stdio/printf_core/parser.h" @@ -37,12 +37,6 @@ using namespace LIBC_NAMESPACE; using namespace LIBC_NAMESPACE::printf_core; -static_assert(sizeof(rpc_buffer_t) == sizeof(rpc::Buffer), - "Buffer size mismatch"); - -static_assert(RPC_MAXIMUM_PORT_COUNT == rpc::MAX_PORT_COUNT, - "Incorrect maximum port count"); - namespace { struct TempStorage { char *alloc(size_t size) { @@ -74,9 +68,9 @@ LIBC_INLINE ::FILE *to_stream(uintptr_t f) { return stream; } -template +template static void handle_printf(rpc::Server::Port &port, TempStorage &temp_storage) { - FILE *files[lane_size] = {nullptr}; + FILE *files[num_lanes] = {nullptr}; // Get the appropriate output stream to use. if (port.get_opcode() == RPC_PRINTF_TO_STREAM || port.get_opcode() == RPC_PRINTF_TO_STREAM_PACKED) @@ -85,22 +79,22 @@ static void handle_printf(rpc::Server::Port &port, TempStorage &temp_storage) { }); else if (port.get_opcode() == RPC_PRINTF_TO_STDOUT || port.get_opcode() == RPC_PRINTF_TO_STDOUT_PACKED) - std::fill(files, files + lane_size, stdout); + std::fill(files, files + num_lanes, stdout); else - std::fill(files, files + lane_size, stderr); + std::fill(files, files + num_lanes, stderr); - uint64_t format_sizes[lane_size] = {0}; - void *format[lane_size] = {nullptr}; + uint64_t format_sizes[num_lanes] = {0}; + void *format[num_lanes] = {nullptr}; - uint64_t args_sizes[lane_size] = {0}; - void *args[lane_size] = {nullptr}; + uint64_t args_sizes[num_lanes] = {0}; + void *args[num_lanes] = {nullptr}; // Recieve the format string and arguments from the client. port.recv_n(format, format_sizes, [&](uint64_t size) { return temp_storage.alloc(size); }); // Parse the format string to get the expected size of the buffer. - for (uint32_t lane = 0; lane < lane_size; ++lane) { + for (uint32_t lane = 0; lane < num_lanes; ++lane) { if (!format[lane]) continue; @@ -125,9 +119,9 @@ static void handle_printf(rpc::Server::Port &port, TempStorage &temp_storage) { // Identify any arguments that are actually pointers to strings on the client. // Additionally we want to determine how much buffer space we need to print. - std::vector strs_to_copy[lane_size]; - int buffer_size[lane_size] = {0}; - for (uint32_t lane = 0; lane < lane_size; ++lane) { + std::vector strs_to_copy[num_lanes]; + int buffer_size[num_lanes] = {0}; + for (uint32_t lane = 0; lane < num_lanes; ++lane) { if (!format[lane]) continue; @@ -159,7 +153,7 @@ static void handle_printf(rpc::Server::Port &port, TempStorage &temp_storage) { } // Recieve any strings from the client and push them into a buffer. - std::vector copied_strs[lane_size]; + std::vector copied_strs[num_lanes]; while (std::any_of(std::begin(strs_to_copy), std::end(strs_to_copy), [](const auto &v) { return !v.empty() && v.back(); })) { port.send([&](rpc::Buffer *buffer, uint32_t id) { @@ -168,11 +162,11 @@ static void handle_printf(rpc::Server::Port &port, TempStorage &temp_storage) { if (!strs_to_copy[id].empty()) strs_to_copy[id].pop_back(); }); - uint64_t str_sizes[lane_size] = {0}; - void *strs[lane_size] = {nullptr}; + uint64_t str_sizes[num_lanes] = {0}; + void *strs[num_lanes] = {nullptr}; port.recv_n(strs, str_sizes, [&](uint64_t size) { return temp_storage.alloc(size); }); - for (uint32_t lane = 0; lane < lane_size; ++lane) { + for (uint32_t lane = 0; lane < num_lanes; ++lane) { if (!strs[lane]) continue; @@ -182,8 +176,8 @@ static void handle_printf(rpc::Server::Port &port, TempStorage &temp_storage) { } // Perform the final formatting and printing using the LLVM C library printf. - int results[lane_size] = {0}; - for (uint32_t lane = 0; lane < lane_size; ++lane) { + int results[num_lanes] = {0}; + for (uint32_t lane = 0; lane < num_lanes; ++lane) { if (!format[lane]) continue; @@ -233,42 +227,34 @@ static void handle_printf(rpc::Server::Port &port, TempStorage &temp_storage) { }); } -template -rpc_status_t handle_server_impl( - rpc::Server &server, - const std::unordered_map &callbacks, - const std::unordered_map &callback_data, - uint32_t &index) { - auto port = server.try_open(lane_size, index); - if (!port) - return RPC_STATUS_SUCCESS; - +template +rpc::Status handle_port_impl(rpc::Server::Port &port) { TempStorage temp_storage; - switch (port->get_opcode()) { + switch (port.get_opcode()) { case RPC_WRITE_TO_STREAM: case RPC_WRITE_TO_STDERR: case RPC_WRITE_TO_STDOUT: case RPC_WRITE_TO_STDOUT_NEWLINE: { - uint64_t sizes[lane_size] = {0}; - void *strs[lane_size] = {nullptr}; - FILE *files[lane_size] = {nullptr}; - if (port->get_opcode() == RPC_WRITE_TO_STREAM) { - port->recv([&](rpc::Buffer *buffer, uint32_t id) { + uint64_t sizes[num_lanes] = {0}; + void *strs[num_lanes] = {nullptr}; + FILE *files[num_lanes] = {nullptr}; + if (port.get_opcode() == RPC_WRITE_TO_STREAM) { + port.recv([&](rpc::Buffer *buffer, uint32_t id) { files[id] = reinterpret_cast(buffer->data[0]); }); - } else if (port->get_opcode() == RPC_WRITE_TO_STDERR) { - std::fill(files, files + lane_size, stderr); + } else if (port.get_opcode() == RPC_WRITE_TO_STDERR) { + std::fill(files, files + num_lanes, stderr); } else { - std::fill(files, files + lane_size, stdout); + std::fill(files, files + num_lanes, stdout); } - port->recv_n(strs, sizes, - [&](uint64_t size) { return temp_storage.alloc(size); }); - port->send([&](rpc::Buffer *buffer, uint32_t id) { + port.recv_n(strs, sizes, + [&](uint64_t size) { return temp_storage.alloc(size); }); + port.send([&](rpc::Buffer *buffer, uint32_t id) { flockfile(files[id]); buffer->data[0] = fwrite_unlocked(strs[id], 1, sizes[id], files[id]); - if (port->get_opcode() == RPC_WRITE_TO_STDOUT_NEWLINE && + if (port.get_opcode() == RPC_WRITE_TO_STDOUT_NEWLINE && buffer->data[0] == sizes[id]) buffer->data[0] += fwrite_unlocked("\n", 1, 1, files[id]); funlockfile(files[id]); @@ -276,37 +262,37 @@ rpc_status_t handle_server_impl( break; } case RPC_READ_FROM_STREAM: { - uint64_t sizes[lane_size] = {0}; - void *data[lane_size] = {nullptr}; - port->recv([&](rpc::Buffer *buffer, uint32_t id) { + uint64_t sizes[num_lanes] = {0}; + void *data[num_lanes] = {nullptr}; + port.recv([&](rpc::Buffer *buffer, uint32_t id) { data[id] = temp_storage.alloc(buffer->data[0]); sizes[id] = fread(data[id], 1, buffer->data[0], to_stream(buffer->data[1])); }); - port->send_n(data, sizes); - port->send([&](rpc::Buffer *buffer, uint32_t id) { + port.send_n(data, sizes); + port.send([&](rpc::Buffer *buffer, uint32_t id) { std::memcpy(buffer->data, &sizes[id], sizeof(uint64_t)); }); break; } case RPC_READ_FGETS: { - uint64_t sizes[lane_size] = {0}; - void *data[lane_size] = {nullptr}; - port->recv([&](rpc::Buffer *buffer, uint32_t id) { + uint64_t sizes[num_lanes] = {0}; + void *data[num_lanes] = {nullptr}; + port.recv([&](rpc::Buffer *buffer, uint32_t id) { data[id] = temp_storage.alloc(buffer->data[0]); const char *str = fgets(reinterpret_cast(data[id]), buffer->data[0], to_stream(buffer->data[1])); sizes[id] = !str ? 0 : std::strlen(str) + 1; }); - port->send_n(data, sizes); + port.send_n(data, sizes); break; } case RPC_OPEN_FILE: { - uint64_t sizes[lane_size] = {0}; - void *paths[lane_size] = {nullptr}; - port->recv_n(paths, sizes, - [&](uint64_t size) { return temp_storage.alloc(size); }); - port->recv_and_send([&](rpc::Buffer *buffer, uint32_t id) { + uint64_t sizes[num_lanes] = {0}; + void *paths[num_lanes] = {nullptr}; + port.recv_n(paths, sizes, + [&](uint64_t size) { return temp_storage.alloc(size); }); + port.recv_and_send([&](rpc::Buffer *buffer, uint32_t id) { FILE *file = fopen(reinterpret_cast(paths[id]), reinterpret_cast(buffer->data)); buffer->data[0] = reinterpret_cast(file); @@ -314,7 +300,7 @@ rpc_status_t handle_server_impl( break; } case RPC_CLOSE_FILE: { - port->recv_and_send([&](rpc::Buffer *buffer, uint32_t id) { + port.recv_and_send([&](rpc::Buffer *buffer, uint32_t id) { FILE *file = reinterpret_cast(buffer->data[0]); buffer->data[0] = fclose(file); }); @@ -322,8 +308,8 @@ rpc_status_t handle_server_impl( } case RPC_EXIT: { // Send a response to the client to signal that we are ready to exit. - port->recv_and_send([](rpc::Buffer *, uint32_t) {}); - port->recv([](rpc::Buffer *buffer, uint32_t) { + port.recv_and_send([](rpc::Buffer *, uint32_t) {}); + port.recv([](rpc::Buffer *buffer, uint32_t) { int status = 0; std::memcpy(&status, buffer->data, sizeof(int)); exit(status); @@ -332,47 +318,47 @@ rpc_status_t handle_server_impl( } case RPC_ABORT: { // Send a response to the client to signal that we are ready to abort. - port->recv_and_send([](rpc::Buffer *, uint32_t) {}); - port->recv([](rpc::Buffer *, uint32_t) {}); + port.recv_and_send([](rpc::Buffer *, uint32_t) {}); + port.recv([](rpc::Buffer *, uint32_t) {}); abort(); break; } case RPC_HOST_CALL: { - uint64_t sizes[lane_size] = {0}; - unsigned long long results[lane_size] = {0}; - void *args[lane_size] = {nullptr}; - port->recv_n(args, sizes, - [&](uint64_t size) { return temp_storage.alloc(size); }); - port->recv([&](rpc::Buffer *buffer, uint32_t id) { + uint64_t sizes[num_lanes] = {0}; + unsigned long long results[num_lanes] = {0}; + void *args[num_lanes] = {nullptr}; + port.recv_n(args, sizes, + [&](uint64_t size) { return temp_storage.alloc(size); }); + port.recv([&](rpc::Buffer *buffer, uint32_t id) { using func_ptr_t = unsigned long long (*)(void *); auto func = reinterpret_cast(buffer->data[0]); results[id] = func(args[id]); }); - port->send([&](rpc::Buffer *buffer, uint32_t id) { + port.send([&](rpc::Buffer *buffer, uint32_t id) { buffer->data[0] = static_cast(results[id]); }); break; } case RPC_FEOF: { - port->recv_and_send([](rpc::Buffer *buffer, uint32_t) { + port.recv_and_send([](rpc::Buffer *buffer, uint32_t) { buffer->data[0] = feof(to_stream(buffer->data[0])); }); break; } case RPC_FERROR: { - port->recv_and_send([](rpc::Buffer *buffer, uint32_t) { + port.recv_and_send([](rpc::Buffer *buffer, uint32_t) { buffer->data[0] = ferror(to_stream(buffer->data[0])); }); break; } case RPC_CLEARERR: { - port->recv_and_send([](rpc::Buffer *buffer, uint32_t) { + port.recv_and_send([](rpc::Buffer *buffer, uint32_t) { clearerr(to_stream(buffer->data[0])); }); break; } case RPC_FSEEK: { - port->recv_and_send([](rpc::Buffer *buffer, uint32_t) { + port.recv_and_send([](rpc::Buffer *buffer, uint32_t) { buffer->data[0] = fseek(to_stream(buffer->data[0]), static_cast(buffer->data[1]), static_cast(buffer->data[2])); @@ -380,19 +366,19 @@ rpc_status_t handle_server_impl( break; } case RPC_FTELL: { - port->recv_and_send([](rpc::Buffer *buffer, uint32_t) { + port.recv_and_send([](rpc::Buffer *buffer, uint32_t) { buffer->data[0] = ftell(to_stream(buffer->data[0])); }); break; } case RPC_FFLUSH: { - port->recv_and_send([](rpc::Buffer *buffer, uint32_t) { + port.recv_and_send([](rpc::Buffer *buffer, uint32_t) { buffer->data[0] = fflush(to_stream(buffer->data[0])); }); break; } case RPC_UNGETC: { - port->recv_and_send([](rpc::Buffer *buffer, uint32_t) { + port.recv_and_send([](rpc::Buffer *buffer, uint32_t) { buffer->data[0] = ungetc(static_cast(buffer->data[0]), to_stream(buffer->data[1])); }); @@ -401,36 +387,36 @@ rpc_status_t handle_server_impl( case RPC_PRINTF_TO_STREAM_PACKED: case RPC_PRINTF_TO_STDOUT_PACKED: case RPC_PRINTF_TO_STDERR_PACKED: { - handle_printf(*port, temp_storage); + handle_printf(port, temp_storage); break; } case RPC_PRINTF_TO_STREAM: case RPC_PRINTF_TO_STDOUT: case RPC_PRINTF_TO_STDERR: { - handle_printf(*port, temp_storage); + handle_printf(port, temp_storage); break; } case RPC_REMOVE: { - uint64_t sizes[lane_size] = {0}; - void *args[lane_size] = {nullptr}; - port->recv_n(args, sizes, - [&](uint64_t size) { return temp_storage.alloc(size); }); - port->send([&](rpc::Buffer *buffer, uint32_t id) { + uint64_t sizes[num_lanes] = {0}; + void *args[num_lanes] = {nullptr}; + port.recv_n(args, sizes, + [&](uint64_t size) { return temp_storage.alloc(size); }); + port.send([&](rpc::Buffer *buffer, uint32_t id) { buffer->data[0] = static_cast( remove(reinterpret_cast(args[id]))); }); break; } case RPC_RENAME: { - uint64_t oldsizes[lane_size] = {0}; - uint64_t newsizes[lane_size] = {0}; - void *oldpath[lane_size] = {nullptr}; - void *newpath[lane_size] = {nullptr}; - port->recv_n(oldpath, oldsizes, - [&](uint64_t size) { return temp_storage.alloc(size); }); - port->recv_n(newpath, newsizes, - [&](uint64_t size) { return temp_storage.alloc(size); }); - port->send([&](rpc::Buffer *buffer, uint32_t id) { + uint64_t oldsizes[num_lanes] = {0}; + uint64_t newsizes[num_lanes] = {0}; + void *oldpath[num_lanes] = {nullptr}; + void *newpath[num_lanes] = {nullptr}; + port.recv_n(oldpath, oldsizes, + [&](uint64_t size) { return temp_storage.alloc(size); }); + port.recv_n(newpath, newsizes, + [&](uint64_t size) { return temp_storage.alloc(size); }); + port.send([&](rpc::Buffer *buffer, uint32_t id) { buffer->data[0] = static_cast( rename(reinterpret_cast(oldpath[id]), reinterpret_cast(newpath[id]))); @@ -438,168 +424,36 @@ rpc_status_t handle_server_impl( break; } case RPC_SYSTEM: { - uint64_t sizes[lane_size] = {0}; - void *args[lane_size] = {nullptr}; - port->recv_n(args, sizes, - [&](uint64_t size) { return temp_storage.alloc(size); }); - port->send([&](rpc::Buffer *buffer, uint32_t id) { + uint64_t sizes[num_lanes] = {0}; + void *args[num_lanes] = {nullptr}; + port.recv_n(args, sizes, + [&](uint64_t size) { return temp_storage.alloc(size); }); + port.send([&](rpc::Buffer *buffer, uint32_t id) { buffer->data[0] = static_cast( system(reinterpret_cast(args[id]))); }); break; } case RPC_NOOP: { - port->recv([](rpc::Buffer *, uint32_t) {}); + port.recv([](rpc::Buffer *, uint32_t) {}); break; } - default: { - auto handler = - callbacks.find(static_cast(port->get_opcode())); - - // We error out on an unhandled opcode. - if (handler == callbacks.end()) - return RPC_STATUS_UNHANDLED_OPCODE; - - // Invoke the registered callback with a reference to the port. - void *data = - callback_data.at(static_cast(port->get_opcode())); - rpc_port_t port_ref{reinterpret_cast(&*port), lane_size}; - (handler->second)(port_ref, data); + default: + return rpc::UNHANDLED_OPCODE; } - } - - // Increment the index so we start the scan after this port. - index = port->get_index() + 1; - port->close(); - return RPC_STATUS_CONTINUE; + return rpc::SUCCESS; } -struct Device { - Device(uint32_t lane_size, uint32_t num_ports, void *buffer) - : lane_size(lane_size), buffer(buffer), server(num_ports, buffer), - client(num_ports, buffer) {} - - rpc_status_t handle_server(uint32_t &index) { - switch (lane_size) { - case 1: - return handle_server_impl<1>(server, callbacks, callback_data, index); - case 32: - return handle_server_impl<32>(server, callbacks, callback_data, index); - case 64: - return handle_server_impl<64>(server, callbacks, callback_data, index); - default: - return RPC_STATUS_INVALID_LANE_SIZE; - } +int libc_handle_rpc_port(void *port, uint32_t num_lanes) { + switch (num_lanes) { + case 1: + return handle_port_impl<1>(*reinterpret_cast(port)); + case 32: + return handle_port_impl<32>(*reinterpret_cast(port)); + case 64: + return handle_port_impl<64>(*reinterpret_cast(port)); + default: + return rpc::ERROR; } - - uint32_t lane_size; - void *buffer; - rpc::Server server; - rpc::Client client; - std::unordered_map callbacks; - std::unordered_map callback_data; -}; - -rpc_status_t rpc_server_init(rpc_device_t *rpc_device, uint64_t num_ports, - uint32_t lane_size, rpc_alloc_ty alloc, - void *data) { - if (!rpc_device) - return RPC_STATUS_ERROR; - if (lane_size != 1 && lane_size != 32 && lane_size != 64) - return RPC_STATUS_INVALID_LANE_SIZE; - - uint64_t size = rpc::Server::allocation_size(lane_size, num_ports); - void *buffer = alloc(size, data); - - if (!buffer) - return RPC_STATUS_ERROR; - - Device *device = new Device(lane_size, num_ports, buffer); - if (!device) - return RPC_STATUS_ERROR; - - rpc_device->handle = reinterpret_cast(device); - return RPC_STATUS_SUCCESS; -} - -rpc_status_t rpc_server_shutdown(rpc_device_t rpc_device, rpc_free_ty dealloc, - void *data) { - if (!rpc_device.handle) - return RPC_STATUS_ERROR; - - Device *device = reinterpret_cast(rpc_device.handle); - dealloc(device->buffer, data); - delete device; - - return RPC_STATUS_SUCCESS; -} - -rpc_status_t rpc_handle_server(rpc_device_t rpc_device) { - if (!rpc_device.handle) - return RPC_STATUS_ERROR; - - Device *device = reinterpret_cast(rpc_device.handle); - uint32_t index = 0; - for (;;) { - rpc_status_t status = device->handle_server(index); - if (status != RPC_STATUS_CONTINUE) - return status; - } -} - -rpc_status_t rpc_register_callback(rpc_device_t rpc_device, uint32_t opcode, - rpc_opcode_callback_ty callback, - void *data) { - if (!rpc_device.handle) - return RPC_STATUS_ERROR; - - Device *device = reinterpret_cast(rpc_device.handle); - - device->callbacks[opcode] = callback; - device->callback_data[opcode] = data; - return RPC_STATUS_SUCCESS; -} - -const void *rpc_get_client_buffer(rpc_device_t rpc_device) { - if (!rpc_device.handle) - return nullptr; - Device *device = reinterpret_cast(rpc_device.handle); - return &device->client; -} - -uint64_t rpc_get_client_size() { return sizeof(rpc::Client); } - -void rpc_send(rpc_port_t ref, rpc_port_callback_ty callback, void *data) { - auto port = reinterpret_cast(ref.handle); - port->send([=](rpc::Buffer *buffer, uint32_t) { - callback(reinterpret_cast(buffer), data); - }); -} - -void rpc_send_n(rpc_port_t ref, const void *const *src, uint64_t *size) { - auto port = reinterpret_cast(ref.handle); - port->send_n(src, size); -} - -void rpc_recv(rpc_port_t ref, rpc_port_callback_ty callback, void *data) { - auto port = reinterpret_cast(ref.handle); - port->recv([=](rpc::Buffer *buffer, uint32_t) { - callback(reinterpret_cast(buffer), data); - }); -} - -void rpc_recv_n(rpc_port_t ref, void **dst, uint64_t *size, rpc_alloc_ty alloc, - void *data) { - auto port = reinterpret_cast(ref.handle); - auto alloc_fn = [=](uint64_t size) { return alloc(size, data); }; - port->recv_n(dst, size, alloc_fn); -} - -void rpc_recv_and_send(rpc_port_t ref, rpc_port_callback_ty callback, - void *data) { - auto port = reinterpret_cast(ref.handle); - port->recv_and_send([=](rpc::Buffer *buffer, uint32_t) { - callback(reinterpret_cast(buffer), data); - }); } diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst index 9039c6f046445..d520c46bae1ef 100644 --- a/libcxx/docs/ReleaseNotes/20.rst +++ b/libcxx/docs/ReleaseNotes/20.rst @@ -102,6 +102,9 @@ Deprecations and Removals headers as an extension and only deprecates them. The ``_LIBCPP_DISABLE_DEPRECATION_WARNINGS`` macro can be defined to suppress deprecation for these headers. +- The ``_LIBCPP_DISABLE_AVAILABILITY`` macro that was used to force-disable availability markup has now been removed. + Whether availability markup is used by the library is now solely controlled at configuration-time. + Upcoming Deprecations and Removals ---------------------------------- diff --git a/libcxx/include/__chrono/duration.h b/libcxx/include/__chrono/duration.h index a401178b2a75c..941aca6009599 100644 --- a/libcxx/include/__chrono/duration.h +++ b/libcxx/include/__chrono/duration.h @@ -542,8 +542,4 @@ _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS -#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 -# include -#endif - #endif // _LIBCPP___CHRONO_DURATION_H diff --git a/libcxx/include/__configuration/availability.h b/libcxx/include/__configuration/availability.h index d805c5a4d978d..efda2a04a4841 100644 --- a/libcxx/include/__configuration/availability.h +++ b/libcxx/include/__configuration/availability.h @@ -67,13 +67,6 @@ // // [1]: https://clang.llvm.org/docs/AttributeReference.html#availability -// For backwards compatibility, allow users to define _LIBCPP_DISABLE_AVAILABILITY -// for a while. -#if defined(_LIBCPP_DISABLE_AVAILABILITY) -# undef _LIBCPP_HAS_VENDOR_AVAILABILITY_ANNOTATIONS -# define _LIBCPP_HAS_VENDOR_AVAILABILITY_ANNOTATIONS 0 -#endif - // Availability markup is disabled when building the library, or when a non-Clang // compiler is used because only Clang supports the necessary attributes. #if defined(_LIBCPP_BUILDING_LIBRARY) || defined(_LIBCXXABI_BUILDING_LIBRARY) || !defined(_LIBCPP_COMPILER_CLANG_BASED) diff --git a/libcxx/include/__memory_resource/synchronized_pool_resource.h b/libcxx/include/__memory_resource/synchronized_pool_resource.h index 6384564afc917..bcc1ac4a172e3 100644 --- a/libcxx/include/__memory_resource/synchronized_pool_resource.h +++ b/libcxx/include/__memory_resource/synchronized_pool_resource.h @@ -10,10 +10,12 @@ #define _LIBCPP___MEMORY_RESOURCE_SYNCHRONIZED_POOL_RESOURCE_H #include <__config> +#include <__cstddef/size_t.h> #include <__memory_resource/memory_resource.h> #include <__memory_resource/pool_options.h> #include <__memory_resource/unsynchronized_pool_resource.h> -#include +#include <__mutex/mutex.h> +#include <__mutex/unique_lock.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/libcxx/include/future b/libcxx/include/future index cbf3ed9346417..5b2e9eed88e35 100644 --- a/libcxx/include/future +++ b/libcxx/include/future @@ -368,6 +368,7 @@ template struct uses_allocator, Alloc>; # include <__assert> # include <__chrono/duration.h> +# include <__chrono/steady_clock.h> # include <__chrono/time_point.h> # include <__condition_variable/condition_variable.h> # include <__exception/exception_ptr.h> @@ -381,6 +382,9 @@ template struct uses_allocator, Alloc>; # include <__memory/shared_count.h> # include <__memory/unique_ptr.h> # include <__memory/uses_allocator.h> +# include <__mutex/lock_guard.h> +# include <__mutex/mutex.h> +# include <__mutex/unique_lock.h> # include <__system_error/error_category.h> # include <__system_error/error_code.h> # include <__system_error/error_condition.h> @@ -390,14 +394,19 @@ template struct uses_allocator, Alloc>; # include <__type_traits/conditional.h> # include <__type_traits/decay.h> # include <__type_traits/enable_if.h> +# include <__type_traits/invoke.h> +# include <__type_traits/is_same.h> +# include <__type_traits/remove_cvref.h> +# include <__type_traits/remove_reference.h> # include <__type_traits/strip_signature.h> # include <__type_traits/underlying_type.h> # include <__utility/auto_cast.h> # include <__utility/forward.h> # include <__utility/move.h> -# include +# include <__utility/swap.h> # include # include +# include # include # if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/memory_resource b/libcxx/include/memory_resource index 7de69e67b7c06..e54b7e6e2473f 100644 --- a/libcxx/include/memory_resource +++ b/libcxx/include/memory_resource @@ -66,6 +66,10 @@ namespace std::pmr { # pragma GCC system_header #endif +#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER >= 17 && _LIBCPP_STD_VER <= 20 +# include +#endif + #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include #endif diff --git a/libcxx/include/syncstream b/libcxx/include/syncstream index 970706976e1ff..2699a4b3a6fbb 100644 --- a/libcxx/include/syncstream +++ b/libcxx/include/syncstream @@ -121,6 +121,7 @@ namespace std { #if _LIBCPP_HAS_LOCALIZATION +# include <__mutex/lock_guard.h> # include <__utility/move.h> # include # include // required for declaration of default arguments @@ -129,7 +130,6 @@ namespace std { # if _LIBCPP_HAS_THREADS # include -# include # include # endif diff --git a/libcxx/test/libcxx/transitive_includes/cxx23.csv b/libcxx/test/libcxx/transitive_includes/cxx23.csv index a008b4d76edde..096c321672474 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx23.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx23.csv @@ -465,7 +465,6 @@ future iosfwd future istream future limits future locale -future mutex future new future ratio future sstream @@ -692,11 +691,9 @@ memory_resource compare memory_resource cstdint memory_resource ctime memory_resource limits -memory_resource mutex memory_resource new memory_resource ratio memory_resource tuple -memory_resource typeinfo memory_resource version mutex cerrno mutex climits @@ -1076,7 +1073,6 @@ syncstream iosfwd syncstream limits syncstream locale syncstream map -syncstream mutex syncstream new syncstream optional syncstream ostream diff --git a/libcxx/test/libcxx/transitive_includes/cxx26.csv b/libcxx/test/libcxx/transitive_includes/cxx26.csv index d5321da32b3d4..74d912e5fe3a3 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx26.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx26.csv @@ -464,7 +464,6 @@ future iosfwd future istream future limits future locale -future mutex future new future ratio future sstream @@ -691,11 +690,9 @@ memory_resource compare memory_resource cstdint memory_resource ctime memory_resource limits -memory_resource mutex memory_resource new memory_resource ratio memory_resource tuple -memory_resource typeinfo memory_resource version mutex cerrno mutex climits @@ -1075,7 +1072,6 @@ syncstream iosfwd syncstream limits syncstream locale syncstream map -syncstream mutex syncstream new syncstream optional syncstream ostream diff --git a/libcxx/utils/ci/buildkite-pipeline.yml b/libcxx/utils/ci/buildkite-pipeline.yml index 36abefe41e2cd..b8e982b653d39 100644 --- a/libcxx/utils/ci/buildkite-pipeline.yml +++ b/libcxx/utils/ci/buildkite-pipeline.yml @@ -17,13 +17,6 @@ # goal being to reduce the load on testers when a commit is known to fail. # -env: - # LLVM POST-BRANCH bump version - # LLVM POST-BRANCH add compiler test for ToT - 1, e.g. "Clang 17" - # LLVM RELEASE bump remove compiler ToT - 3, e.g. "Clang 15" - LLVM_HEAD_VERSION: "18" # Used compiler, update POST-BRANCH. - GCC_STABLE_VERSION: "13" - definitions: _common: &common timeout_in_minutes: 120 diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h index 3d416e6985d02..57cb443798cd8 100644 --- a/lld/COFF/Config.h +++ b/lld/COFF/Config.h @@ -114,6 +114,7 @@ struct Configuration { bool is64() const { return llvm::COFF::is64Bit(machine); } llvm::COFF::MachineTypes machine = IMAGE_FILE_MACHINE_UNKNOWN; + bool machineInferred = false; size_t wordsize; bool verbose = false; WindowsSubsystem subsystem = llvm::COFF::IMAGE_SUBSYSTEM_UNKNOWN; diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp index df3c5a176b52e..0c6df701284b7 100644 --- a/lld/COFF/SymbolTable.cpp +++ b/lld/COFF/SymbolTable.cpp @@ -46,6 +46,8 @@ static bool compatibleMachineType(COFFLinkerContext &ctx, MachineTypes mt) { return COFF::isArm64EC(mt) || mt == AMD64; case ARM64X: return COFF::isAnyArm64(mt) || mt == AMD64; + case IMAGE_FILE_MACHINE_UNKNOWN: + return true; default: return ctx.config.machine == mt; } @@ -74,14 +76,26 @@ void SymbolTable::addFile(InputFile *file) { } MachineTypes mt = file->getMachineType(); - if (ctx.config.machine == IMAGE_FILE_MACHINE_UNKNOWN) { - ctx.config.machine = mt; - ctx.driver.addWinSysRootLibSearchPaths(); - } else if (!compatibleMachineType(ctx, mt)) { + // The ARM64EC target must be explicitly specified and cannot be inferred. + if (mt == ARM64EC && + (ctx.config.machine == IMAGE_FILE_MACHINE_UNKNOWN || + (ctx.config.machineInferred && + (ctx.config.machine == ARM64 || ctx.config.machine == AMD64)))) { + error(toString(file) + ": machine type arm64ec is ambiguous and cannot be " + "inferred, use /machine:arm64ec or /machine:arm64x"); + return; + } + if (!compatibleMachineType(ctx, mt)) { error(toString(file) + ": machine type " + machineToStr(mt) + " conflicts with " + machineToStr(ctx.config.machine)); return; } + if (ctx.config.machine == IMAGE_FILE_MACHINE_UNKNOWN && + mt != IMAGE_FILE_MACHINE_UNKNOWN) { + ctx.config.machineInferred = true; + ctx.config.machine = mt; + ctx.driver.addWinSysRootLibSearchPaths(); + } ctx.driver.parseDirectives(file); } diff --git a/lld/Common/ErrorHandler.cpp b/lld/Common/ErrorHandler.cpp index ad6867744c145..6b60ebb18e821 100644 --- a/lld/Common/ErrorHandler.cpp +++ b/lld/Common/ErrorHandler.cpp @@ -337,8 +337,9 @@ void ErrorHandler::fatal(const Twine &msg) { } SyncStream::~SyncStream() { - os.flush(); switch (level) { + case DiagLevel::None: + break; case DiagLevel::Log: e.log(buf); break; diff --git a/lld/ELF/AArch64ErrataFix.cpp b/lld/ELF/AArch64ErrataFix.cpp index 7c65b8ae8c665..b5641e5d9ce55 100644 --- a/lld/ELF/AArch64ErrataFix.cpp +++ b/lld/ELF/AArch64ErrataFix.cpp @@ -393,8 +393,8 @@ class elf::Patch843419Section final : public SyntheticSection { }; Patch843419Section::Patch843419Section(Ctx &ctx, InputSection *p, uint64_t off) - : SyntheticSection(ctx, SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 4, - ".text.patch"), + : SyntheticSection(ctx, ".text.patch", SHT_PROGBITS, + SHF_ALLOC | SHF_EXECINSTR, 4), patchee(p), patcheeOffset(off) { this->parent = p->getParent(); patchSym = addSyntheticLocal( diff --git a/lld/ELF/ARMErrataFix.cpp b/lld/ELF/ARMErrataFix.cpp index 4257e491121f2..a7120c43e51d3 100644 --- a/lld/ELF/ARMErrataFix.cpp +++ b/lld/ELF/ARMErrataFix.cpp @@ -136,8 +136,8 @@ static bool is32bitBranch(uint32_t instr) { Patch657417Section::Patch657417Section(Ctx &ctx, InputSection *p, uint64_t off, uint32_t instr, bool isARM) - : SyntheticSection(ctx, SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 4, - ".text.patch"), + : SyntheticSection(ctx, ".text.patch", SHT_PROGBITS, + SHF_ALLOC | SHF_EXECINSTR, 4), patchee(p), patcheeOffset(off), instr(instr), isARM(isARM) { parent = p->getParent(); patchSym = addSyntheticLocal( diff --git a/lld/ELF/Arch/AMDGPU.cpp b/lld/ELF/Arch/AMDGPU.cpp index ab948e65c25ed..52fc779855a36 100644 --- a/lld/ELF/Arch/AMDGPU.cpp +++ b/lld/ELF/Arch/AMDGPU.cpp @@ -73,7 +73,7 @@ uint32_t AMDGPU::calcEFlagsV4() const { // features in the same category are either ANY, ANY and ON, or ANY and OFF). for (InputFile *f : ArrayRef(ctx.objectFiles).slice(1)) { if (retMach != (getEFlags(f) & EF_AMDGPU_MACH)) { - ErrAlways(ctx) << "incompatible mach: " << f; + Err(ctx) << "incompatible mach: " << f; return 0; } @@ -82,7 +82,7 @@ uint32_t AMDGPU::calcEFlagsV4() const { (getEFlags(f) & EF_AMDGPU_FEATURE_XNACK_V4) != EF_AMDGPU_FEATURE_XNACK_ANY_V4)) { if (retXnack != (getEFlags(f) & EF_AMDGPU_FEATURE_XNACK_V4)) { - ErrAlways(ctx) << "incompatible xnack: " << f; + Err(ctx) << "incompatible xnack: " << f; return 0; } } else { @@ -95,7 +95,7 @@ uint32_t AMDGPU::calcEFlagsV4() const { (getEFlags(f) & EF_AMDGPU_FEATURE_SRAMECC_V4) != EF_AMDGPU_FEATURE_SRAMECC_ANY_V4)) { if (retSramEcc != (getEFlags(f) & EF_AMDGPU_FEATURE_SRAMECC_V4)) { - ErrAlways(ctx) << "incompatible sramecc: " << f; + Err(ctx) << "incompatible sramecc: " << f; return 0; } } else { @@ -143,7 +143,7 @@ uint32_t AMDGPU::calcEFlags() const { case ELFABIVERSION_AMDGPU_HSA_V6: return calcEFlagsV6(); default: - ErrAlways(ctx) << "unknown abi version: " << abiVersion; + Err(ctx) << "unknown abi version: " << abiVersion; return 0; } } diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp index 69ec0d34ae119..62685b1e7dede 100644 --- a/lld/ELF/Arch/ARM.cpp +++ b/lld/ELF/Arch/ARM.cpp @@ -557,8 +557,8 @@ void ARM::encodeAluGroup(uint8_t *loc, const Relocation &rel, uint64_t val, rot = (lz + 8) << 7; } if (check && imm > 0xff) - Err(ctx) << getErrorLoc(ctx, loc) << "unencodeable immediate " - << Twine(val).str() << " for relocation " << rel.type; + Err(ctx) << getErrorLoc(ctx, loc) << "unencodeable immediate " << val + << " for relocation " << rel.type; write32(ctx, loc, (read32(ctx, loc) & 0xff3ff000) | opcode | rot | (imm & 0xff)); } @@ -1219,29 +1219,27 @@ template void ObjFile::importCmseSymbols() { sym->stOther = eSym.st_other; if (eSym.st_shndx != SHN_ABS) { - ErrAlways(ctx) << "CMSE symbol '" << sym->getName() - << "' in import library '" << this << "' is not absolute"; + Err(ctx) << "CMSE symbol '" << sym->getName() << "' in import library '" + << this << "' is not absolute"; continue; } if (!(eSym.st_value & 1) || (eSym.getType() != STT_FUNC)) { - ErrAlways(ctx) << "CMSE symbol '" << sym->getName() - << "' in import library '" << this - << "' is not a Thumb function definition"; + Err(ctx) << "CMSE symbol '" << sym->getName() << "' in import library '" + << this << "' is not a Thumb function definition"; continue; } if (ctx.symtab->cmseImportLib.count(sym->getName())) { - ErrAlways(ctx) << "CMSE symbol '" << sym->getName() - << "' is multiply defined in import library '" << this - << "'"; + Err(ctx) << "CMSE symbol '" << sym->getName() + << "' is multiply defined in import library '" << this << "'"; continue; } if (eSym.st_size != ACLESESYM_SIZE) { Warn(ctx) << "CMSE symbol '" << sym->getName() << "' in import library '" - << this << "' does not have correct size of " - << Twine(ACLESESYM_SIZE) << " bytes"; + << this << "' does not have correct size of " << ACLESESYM_SIZE + << " bytes"; } ctx.symtab->cmseImportLib[sym->getName()] = sym; @@ -1289,8 +1287,7 @@ void elf::processArmCmseSymbols(Ctx &ctx) { // If input object build attributes do not support CMSE, error and disable // further scanning for , __acle_se_ pairs. if (!ctx.arg.armCMSESupport) { - ErrAlways(ctx) - << "CMSE is only supported by ARMv8-M architecture or later"; + Err(ctx) << "CMSE is only supported by ARMv8-M architecture or later"; ctx.arg.cmseImplib = false; break; } @@ -1300,17 +1297,16 @@ void elf::processArmCmseSymbols(Ctx &ctx) { StringRef name = acleSeSym->getName().substr(std::strlen(ACLESESYM_PREFIX)); Symbol *sym = ctx.symtab->find(name); if (!sym) { - ErrAlways(ctx) - << acleSeSym->file << ": cmse special symbol '" - << acleSeSym->getName() - << "' detected, but no associated entry function definition '" << name - << "' with external linkage found"; + Err(ctx) << acleSeSym->file << ": cmse special symbol '" + << acleSeSym->getName() + << "' detected, but no associated entry function definition '" + << name << "' with external linkage found"; continue; } std::string errMsg = checkCmseSymAttributes(ctx, acleSeSym, sym); if (!errMsg.empty()) { - ErrAlways(ctx) << errMsg; + Err(ctx) << errMsg; continue; } @@ -1331,9 +1327,9 @@ void elf::processArmCmseSymbols(Ctx &ctx) { } ArmCmseSGSection::ArmCmseSGSection(Ctx &ctx) - : SyntheticSection(ctx, llvm::ELF::SHF_ALLOC | llvm::ELF::SHF_EXECINSTR, - llvm::ELF::SHT_PROGBITS, - /*alignment=*/32, ".gnu.sgstubs") { + : SyntheticSection(ctx, ".gnu.sgstubs", SHT_PROGBITS, + SHF_ALLOC | SHF_EXECINSTR, + /*addralign=*/32) { entsize = ACLESESYM_SIZE; // The range of addresses used in the CMSE import library should be fixed. for (auto &[_, sym] : ctx.symtab->cmseImportLib) { @@ -1445,21 +1441,22 @@ void ArmCmseSGSection::finalizeContents() { // See Arm® v8-M Security Extensions: Requirements on Development Tools // https://developer.arm.com/documentation/ecm0359818/latest template void elf::writeARMCmseImportLib(Ctx &ctx) { - StringTableSection *shstrtab = - make(ctx, ".shstrtab", /*dynamic=*/false); - StringTableSection *strtab = - make(ctx, ".strtab", /*dynamic=*/false); - SymbolTableBaseSection *impSymTab = - make>(ctx, *strtab); + auto shstrtab = + std::make_unique(ctx, ".shstrtab", /*dynamic=*/false); + auto strtab = + std::make_unique(ctx, ".strtab", /*dynamic=*/false); + auto impSymTab = std::make_unique>(ctx, *strtab); SmallVector, SyntheticSection *>, 0> osIsPairs; osIsPairs.emplace_back( - std::make_unique(ctx, strtab->name, 0, 0), strtab); + std::make_unique(ctx, strtab->name, 0, 0), strtab.get()); osIsPairs.emplace_back( - std::make_unique(ctx, impSymTab->name, 0, 0), impSymTab); + std::make_unique(ctx, impSymTab->name, 0, 0), + impSymTab.get()); osIsPairs.emplace_back( - std::make_unique(ctx, shstrtab->name, 0, 0), shstrtab); + std::make_unique(ctx, shstrtab->name, 0, 0), + shstrtab.get()); llvm::sort(ctx.symtab->cmseSymMap, [&](const auto &a, const auto &b) { return a.second.sym->getVA(ctx) < b.second.sym->getVA(ctx); @@ -1495,8 +1492,8 @@ template void elf::writeARMCmseImportLib(Ctx &ctx) { Expected> bufferOrErr = FileOutputBuffer::create(ctx.arg.cmseOutputLib, fileSize, flags); if (!bufferOrErr) { - ErrAlways(ctx) << "failed to open " << ctx.arg.cmseOutputLib << ": " - << bufferOrErr.takeError(); + Err(ctx) << "failed to open " << ctx.arg.cmseOutputLib << ": " + << bufferOrErr.takeError(); return; } diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 0eb56de9c7f32..ebfdbafc9983e 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -775,8 +775,8 @@ static bool relax(Ctx &ctx, InputSection &sec) { if (LLVM_UNLIKELY(static_cast(remove) < 0)) { Err(ctx) << getErrorLoc(ctx, (const uint8_t *)loc) << "insufficient padding bytes for " << r.type << ": " - << Twine(allBytes) << " bytes available for " - << "requested alignment of " << Twine(align) << " bytes"; + << allBytes << " bytes available for " + << "requested alignment of " << align << " bytes"; remove = 0; } break; @@ -807,7 +807,7 @@ static bool relax(Ctx &ctx, InputSection &sec) { } // Inform assignAddresses that the size has changed. if (!isUInt<32>(delta)) - Fatal(ctx) << "section size decrease is too large: " << Twine(delta); + Fatal(ctx) << "section size decrease is too large: " << delta; sec.bytesDropped = delta; return changed; } @@ -838,7 +838,7 @@ bool LoongArch::relaxOnce(int pass) const { } void LoongArch::finalizeRelax(int passes) const { - Log(ctx) << "relaxation passes: " << Twine(passes); + Log(ctx) << "relaxation passes: " << passes; SmallVector storage; for (OutputSection *osec : ctx.outputSections) { if (!(osec->flags & SHF_EXECINSTR)) diff --git a/lld/ELF/Arch/Mips.cpp b/lld/ELF/Arch/Mips.cpp index 02f360d73ea15..da76820de240d 100644 --- a/lld/ELF/Arch/Mips.cpp +++ b/lld/ELF/Arch/Mips.cpp @@ -503,7 +503,7 @@ calculateMipsRelChain(Ctx &ctx, uint8_t *loc, uint32_t type, uint64_t val) { if (type2 == R_MIPS_SUB && (type3 == R_MIPS_HI16 || type3 == R_MIPS_LO16)) return std::make_pair(type3, -val); Err(ctx) << getErrorLoc(ctx, loc) << "unsupported relocations combination " - << Twine(type); + << type; return std::make_pair(type & 0xff, val); } diff --git a/lld/ELF/Arch/MipsArchTree.cpp b/lld/ELF/Arch/MipsArchTree.cpp index 0c64a46fe85d0..197cb30cdb8a5 100644 --- a/lld/ELF/Arch/MipsArchTree.cpp +++ b/lld/ELF/Arch/MipsArchTree.cpp @@ -72,24 +72,23 @@ static void checkFlags(Ctx &ctx, ArrayRef files) { for (const FileFlags &f : files) { if (ctx.arg.is64 && f.flags & EF_MIPS_MICROMIPS) - ErrAlways(ctx) << f.file << ": microMIPS 64-bit is not supported"; + Err(ctx) << f.file << ": microMIPS 64-bit is not supported"; uint32_t abi2 = f.flags & (EF_MIPS_ABI | EF_MIPS_ABI2); if (abi != abi2) - ErrAlways(ctx) << f.file << ": ABI '" << getAbiName(abi2) - << "' is incompatible with target ABI '" << getAbiName(abi) - << "'"; + Err(ctx) << f.file << ": ABI '" << getAbiName(abi2) + << "' is incompatible with target ABI '" << getAbiName(abi) + << "'"; bool nan2 = f.flags & EF_MIPS_NAN2008; if (nan != nan2) - ErrAlways(ctx) << f.file << ": -mnan=" << getNanName(nan2) - << " is incompatible with target -mnan=" - << getNanName(nan); + Err(ctx) << f.file << ": -mnan=" << getNanName(nan2) + << " is incompatible with target -mnan=" << getNanName(nan); bool fp2 = f.flags & EF_MIPS_FP64; if (fp != fp2) - ErrAlways(ctx) << f.file << ": -mfp" << getFpName(fp2) - << " is incompatible with target -mfp" << getFpName(fp); + Err(ctx) << f.file << ": -mfp" << getFpName(fp2) + << " is incompatible with target -mfp" << getFpName(fp); } } @@ -284,9 +283,9 @@ static uint32_t getArchFlags(Ctx &ctx, ArrayRef files) { if (isArchMatched(newFlags, ret)) continue; if (!isArchMatched(ret, newFlags)) { - ErrAlways(ctx) << "incompatible target ISA:\n>>> " << files[0].file - << ": " << getFullArchName(ret) << "\n>>> " << f.file - << ": " << getFullArchName(newFlags); + Err(ctx) << "incompatible target ISA:\n>>> " << files[0].file << ": " + << getFullArchName(ret) << "\n>>> " << f.file << ": " + << getFullArchName(newFlags); return 0; } ret = newFlags; @@ -350,15 +349,14 @@ static StringRef getMipsFpAbiName(uint8_t fpAbi) { } } -uint8_t elf::getMipsFpAbiFlag(Ctx &ctx, uint8_t oldFlag, uint8_t newFlag, - StringRef fileName) { +uint8_t elf::getMipsFpAbiFlag(Ctx &ctx, InputFile *file, uint8_t oldFlag, + uint8_t newFlag) { if (compareMipsFpAbi(newFlag, oldFlag) >= 0) return newFlag; if (compareMipsFpAbi(oldFlag, newFlag) < 0) - ErrAlways(ctx) << fileName << ": floating point ABI '" - << getMipsFpAbiName(newFlag) - << "' is incompatible with target floating point ABI '" - << getMipsFpAbiName(oldFlag) << "'"; + Err(ctx) << file << ": floating point ABI '" << getMipsFpAbiName(newFlag) + << "' is incompatible with target floating point ABI '" + << getMipsFpAbiName(oldFlag) << "'"; return oldFlag; } diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp index 4dc9f93f5c688..8dd1735ee1e88 100644 --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -288,10 +288,10 @@ static void writeSequence(Ctx &ctx, const char *prefix, int from, // The full section content has the extent of [begin, end). We drop unused // instructions and write [first,end). auto *sec = make( - ctx.internalFile, SHF_ALLOC, SHT_PROGBITS, 4, + ctx.internalFile, ".text", SHT_PROGBITS, SHF_ALLOC, /*addralign=*/4, + /*entsize=*/0, ArrayRef(reinterpret_cast(buf.data() + first), - 4 * (buf.size() - first)), - ".text"); + 4 * (buf.size() - first))); ctx.inputSections.push_back(sec); for (Defined *sym : defined) { sym->section = sec; diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index 5368ced9a4f53..58a71fd9545c5 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -156,14 +156,13 @@ uint32_t RISCV::calcEFlags() const { target |= EF_RISCV_RVC; if ((eflags & EF_RISCV_FLOAT_ABI) != (target & EF_RISCV_FLOAT_ABI)) - ErrAlways(ctx) << f - << ": cannot link object files with different " - "floating-point ABI from " - << ctx.objectFiles[0]; + Err(ctx) << f + << ": cannot link object files with different " + "floating-point ABI from " + << ctx.objectFiles[0]; if ((eflags & EF_RISCV_RVE) != (target & EF_RISCV_RVE)) - ErrAlways(ctx) - << f << ": cannot link object files with different EF_RISCV_RVE"; + Err(ctx) << f << ": cannot link object files with different EF_RISCV_RVE"; } return target; @@ -659,9 +658,9 @@ void RISCV::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { auto val = rel.sym->getVA(ctx, rel.addend) - rel1.sym->getVA(ctx, rel1.addend); if (overwriteULEB128(loc, val) >= 0x80) - Err(ctx) << sec.getLocation(rel.offset) << ": ULEB128 value " - << Twine(val) << " exceeds available space; references '" - << rel.sym << "'"; + Err(ctx) << sec.getLocation(rel.offset) << ": ULEB128 value " << val + << " exceeds available space; references '" << rel.sym + << "'"; ++i; continue; } @@ -833,10 +832,10 @@ static bool relax(Ctx &ctx, InputSection &sec) { if (LLVM_UNLIKELY(static_cast(remove) < 0)) { Err(ctx) << getErrorLoc(ctx, (const uint8_t *)loc) << "insufficient padding bytes for " << r.type << ": " - << Twine(r.addend) + << r.addend << " bytes available " "for requested alignment of " - << Twine(align) << " bytes"; + << align << " bytes"; remove = 0; } break; @@ -900,7 +899,7 @@ static bool relax(Ctx &ctx, InputSection &sec) { } // Inform assignAddresses that the size has changed. if (!isUInt<32>(delta)) - Fatal(ctx) << "section size decrease is too large: " << Twine(delta); + Fatal(ctx) << "section size decrease is too large: " << delta; sec.bytesDropped = delta; return changed; } @@ -933,7 +932,7 @@ bool RISCV::relaxOnce(int pass) const { void RISCV::finalizeRelax(int passes) const { llvm::TimeTraceScope timeScope("Finalize RISC-V relaxation"); - Log(ctx) << "relaxation passes: " << Twine(passes); + Log(ctx) << "relaxation passes: " << passes; SmallVector storage; for (OutputSection *osec : ctx.outputSections) { if (!(osec->flags & SHF_EXECINSTR)) @@ -1045,7 +1044,7 @@ namespace { class RISCVAttributesSection final : public SyntheticSection { public: RISCVAttributesSection(Ctx &ctx) - : SyntheticSection(ctx, 0, SHT_RISCV_ATTRIBUTES, 1, ".riscv.attributes") { + : SyntheticSection(ctx, ".riscv.attributes", SHT_RISCV_ATTRIBUTES, 0, 1) { } size_t getSize() const override { return size; } @@ -1096,10 +1095,9 @@ static void mergeAtomic(Ctx &ctx, DenseMap::iterator it, auto reportAbiError = [&]() { Err(ctx) << "atomic abi mismatch for " << oldSection->name << "\n>>> " - << oldSection - << ": atomic_abi=" << Twine(static_cast(oldTag)) + << oldSection << ": atomic_abi=" << static_cast(oldTag) << "\n>>> " << newSection - << ": atomic_abi=" << Twine(static_cast(newTag)); + << ": atomic_abi=" << static_cast(newTag); }; auto reportUnknownAbiError = [&](const InputSectionBase *section, @@ -1112,7 +1110,7 @@ static void mergeAtomic(Ctx &ctx, DenseMap::iterator it, return; }; Err(ctx) << "unknown atomic abi for " << section->name << "\n>>> " - << section << ": atomic_abi=" << Twine(static_cast(tag)); + << section << ": atomic_abi=" << static_cast(tag); }; switch (oldTag) { case RISCVAtomicAbiTag::UNKNOWN: diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 10c52d7206b80..bc4b967ccbbbb 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1327,15 +1327,13 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) { OPT_no_lto_validate_all_vtables_have_type_infos, false); ctx.arg.ltoo = args::getInteger(args, OPT_lto_O, 2); if (ctx.arg.ltoo > 3) - ErrAlways(ctx) << "invalid optimization level for LTO: " - << Twine(ctx.arg.ltoo); + ErrAlways(ctx) << "invalid optimization level for LTO: " << ctx.arg.ltoo; unsigned ltoCgo = args::getInteger(args, OPT_lto_CGO, args::getCGOptLevel(ctx.arg.ltoo)); if (auto level = CodeGenOpt::getLevel(ltoCgo)) ctx.arg.ltoCgo = *level; else - ErrAlways(ctx) << "invalid codegen optimization level for LTO: " - << Twine(ltoCgo); + ErrAlways(ctx) << "invalid codegen optimization level for LTO: " << ltoCgo; ctx.arg.ltoObjPath = args.getLastArgValue(OPT_lto_obj_path_eq); ctx.arg.ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1); ctx.arg.ltoSampleProfile = args.getLastArgValue(OPT_lto_sample_profile); @@ -2375,8 +2373,9 @@ static void markAddrsig(bool icfSafe, Symbol *s) { // We don't need to keep text sections unique under --icf=all even if they // are address-significant. if (auto *d = dyn_cast_or_null(s)) - if (d->section && (icfSafe || !(d->section->flags & SHF_EXECINSTR))) - d->section->keepUnique = true; + if (auto *sec = dyn_cast_or_null(d->section)) + if (icfSafe || !(sec->flags & SHF_EXECINSTR)) + sec->keepUnique = true; } // Record sections that define symbols mentioned in --keep-unique @@ -2391,7 +2390,8 @@ static void findKeepUniqueSections(Ctx &ctx, opt::InputArgList &args) { Warn(ctx) << "could not find symbol " << name << " to keep unique"; continue; } - d->section->keepUnique = true; + if (auto *sec = dyn_cast(d->section)) + sec->keepUnique = true; } // --icf=all --ignore-data-address-equality means that we can ignore @@ -2700,21 +2700,6 @@ static void redirectSymbols(Ctx &ctx, ArrayRef wrapped) { ctx.symtab->wrap(w.sym, w.real, w.wrap); } -static void reportMissingFeature(Ctx &ctx, StringRef config, - const Twine &report) { - if (config == "error") - ErrAlways(ctx) << report; - else if (config == "warning") - Warn(ctx) << report; -} - -static void checkAndReportMissingFeature(Ctx &ctx, StringRef config, - uint32_t features, uint32_t mask, - const Twine &report) { - if (!(features & mask)) - reportMissingFeature(ctx, config, report); -} - // To enable CET (x86's hardware-assisted control flow enforcement), each // source file must be compiled with -fcf-protection. Object files compiled // with the flag contain feature flags indicating that they are compatible @@ -2747,28 +2732,43 @@ static void readSecurityNotes(Ctx &ctx) { bool hasValidPauthAbiCoreInfo = llvm::any_of( ctx.aarch64PauthAbiCoreInfo, [](uint8_t c) { return c != 0; }); + auto report = [&](StringRef config) -> ELFSyncStream { + if (config == "error") + return {ctx, DiagLevel::Err}; + else if (config == "warning") + return {ctx, DiagLevel::Warn}; + return {ctx, DiagLevel::None}; + }; + auto reportUnless = [&](StringRef config, bool cond) -> ELFSyncStream { + if (cond) + return {ctx, DiagLevel::None}; + return report(config); + }; for (ELFFileBase *f : ctx.objectFiles) { uint32_t features = f->andFeatures; - checkAndReportMissingFeature( - ctx, ctx.arg.zBtiReport, features, GNU_PROPERTY_AARCH64_FEATURE_1_BTI, - toStr(ctx, f) + ": -z bti-report: file does not have " - "GNU_PROPERTY_AARCH64_FEATURE_1_BTI property"); - - checkAndReportMissingFeature( - ctx, ctx.arg.zGcsReport, features, GNU_PROPERTY_AARCH64_FEATURE_1_GCS, - toStr(ctx, f) + ": -z gcs-report: file does not have " - "GNU_PROPERTY_AARCH64_FEATURE_1_GCS property"); - - checkAndReportMissingFeature( - ctx, ctx.arg.zCetReport, features, GNU_PROPERTY_X86_FEATURE_1_IBT, - toStr(ctx, f) + ": -z cet-report: file does not have " - "GNU_PROPERTY_X86_FEATURE_1_IBT property"); - - checkAndReportMissingFeature( - ctx, ctx.arg.zCetReport, features, GNU_PROPERTY_X86_FEATURE_1_SHSTK, - toStr(ctx, f) + ": -z cet-report: file does not have " - "GNU_PROPERTY_X86_FEATURE_1_SHSTK property"); + reportUnless(ctx.arg.zBtiReport, + features & GNU_PROPERTY_AARCH64_FEATURE_1_BTI) + << f + << ": -z bti-report: file does not have " + "GNU_PROPERTY_AARCH64_FEATURE_1_BTI property"; + + reportUnless(ctx.arg.zGcsReport, + features & GNU_PROPERTY_AARCH64_FEATURE_1_GCS) + << f + << ": -z gcs-report: file does not have " + "GNU_PROPERTY_AARCH64_FEATURE_1_GCS property"; + + reportUnless(ctx.arg.zCetReport, features & GNU_PROPERTY_X86_FEATURE_1_IBT) + << f + << ": -z cet-report: file does not have " + "GNU_PROPERTY_X86_FEATURE_1_IBT property"; + + reportUnless(ctx.arg.zCetReport, + features & GNU_PROPERTY_X86_FEATURE_1_SHSTK) + << f + << ": -z cet-report: file does not have " + "GNU_PROPERTY_X86_FEATURE_1_SHSTK property"; if (ctx.arg.zForceBti && !(features & GNU_PROPERTY_AARCH64_FEATURE_1_BTI)) { features |= GNU_PROPERTY_AARCH64_FEATURE_1_BTI; @@ -2798,11 +2798,11 @@ static void readSecurityNotes(Ctx &ctx) { continue; if (f->aarch64PauthAbiCoreInfo.empty()) { - reportMissingFeature(ctx, ctx.arg.zPauthReport, - toStr(ctx, f) + - ": -z pauth-report: file does not have AArch64 " - "PAuth core info while '" + - referenceFileName + "' has one"); + report(ctx.arg.zPauthReport) + << f + << ": -z pauth-report: file does not have AArch64 " + "PAuth core info while '" + << referenceFileName << "' has one"; continue; } diff --git a/lld/ELF/DriverUtils.cpp b/lld/ELF/DriverUtils.cpp index 0278c070b2473..4c88723f090d0 100644 --- a/lld/ELF/DriverUtils.cpp +++ b/lld/ELF/DriverUtils.cpp @@ -174,6 +174,7 @@ std::string elf::createResponseFile(const opt::InputArgList &args) { break; case OPT_o: case OPT_Map: + case OPT_dependency_file: case OPT_print_archive_stats: case OPT_why_extract: // If an output path contains directories, "lld @response.txt" will diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp index 7090ca779b0e7..606953e94bbad 100644 --- a/lld/ELF/ICF.cpp +++ b/lld/ELF/ICF.cpp @@ -542,7 +542,7 @@ template void ICF::run() { }); } while (repeat); - Log(ctx) << "ICF needed " << Twine(cnt) << " iterations"; + Log(ctx) << "ICF needed " << cnt << " iterations"; // Merge sections by the equivalence class. forEachClassRange(0, sections.size(), [&](size_t begin, size_t end) { diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 3a0ae43b813f4..83a25e1b66cff 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -133,8 +133,7 @@ static void updateARMVFPArgs(Ctx &ctx, const ARMAttributeParser &attributes, // Object compatible with all conventions. return; default: - ErrAlways(ctx) << f - << ": unknown Tag_ABI_VFP_args value: " << Twine(vfpArgs); + ErrAlways(ctx) << f << ": unknown Tag_ABI_VFP_args value: " << vfpArgs; return; } // Follow ld.bfd and error if there is a mix of calling conventions. @@ -284,7 +283,7 @@ static bool isCompatible(Ctx &ctx, InputFile *file) { StringRef target = !ctx.arg.bfdname.empty() ? ctx.arg.bfdname : ctx.arg.emulation; if (!target.empty()) { - ErrAlways(ctx) << file << " is incompatible with " << target; + Err(ctx) << file << " is incompatible with " << target; return false; } @@ -295,10 +294,10 @@ static bool isCompatible(Ctx &ctx, InputFile *file) { existing = ctx.sharedFiles[0]; else if (!ctx.bitcodeFiles.empty()) existing = ctx.bitcodeFiles[0]; - std::string with; + auto diag = Err(ctx); + diag << file << " is incompatible"; if (existing) - with = " with " + toStr(ctx, existing); - ErrAlways(ctx) << file << " is incompatible" << with; + diag << " with " << existing; return false; } @@ -691,8 +690,7 @@ template void ObjFile::parse(bool ignoreComdats) { // Otherwise, discard group members. for (uint32_t secIndex : entries.slice(1)) { if (secIndex >= size) - Fatal(ctx) << this - << ": invalid section index in group: " << Twine(secIndex); + Fatal(ctx) << this << ": invalid section index in group: " << secIndex; this->sections[secIndex] = &InputSection::discarded; } } @@ -748,8 +746,8 @@ bool ObjFile::shouldMerge(const Elf_Shdr &sec, StringRef name) { return false; if (sec.sh_size % entSize) Fatal(ctx) << this << ":(" << name << "): SHF_MERGE section size (" - << Twine(sec.sh_size) << ") must be a multiple of sh_entsize (" - << Twine(entSize) << ")"; + << uint64_t(sec.sh_size) + << ") must be a multiple of sh_entsize (" << entSize << ")"; if (sec.sh_flags & SHF_WRITE) Fatal(ctx) << this << ":(" << name @@ -810,7 +808,7 @@ void ObjFile::initializeSections(bool ignoreComdats, Warn(ctx) << this << ": --icf=safe conservatively ignores " "SHT_LLVM_ADDRSIG [index " - << Twine(i) + << i << "] with sh_link=0 " "(likely created using objcopy or ld -r)"; } @@ -903,9 +901,9 @@ void ObjFile::initializeSections(bool ignoreComdats, // simply handle such sections as non-mergeable ones. Degrading like this // is acceptable because section merging is optional. if (auto *ms = dyn_cast(s)) { - s = makeThreadLocal( - ms->file, ms->flags, ms->type, ms->addralign, - ms->contentMaybeDecompress(), ms->name); + s = makeThreadLocal(ms->file, ms->name, ms->type, + ms->flags, ms->addralign, ms->entsize, + ms->contentMaybeDecompress()); sections[info] = s; } @@ -939,7 +937,8 @@ void ObjFile::initializeSections(bool ignoreComdats, if (sec.sh_link < size) linkSec = this->sections[sec.sh_link]; if (!linkSec) - Fatal(ctx) << this << ": invalid sh_link index: " << Twine(sec.sh_link); + Fatal(ctx) << this + << ": invalid sh_link index: " << uint32_t(sec.sh_link); // A SHF_LINK_ORDER section is discarded if its linked-to section is // discarded. @@ -1167,7 +1166,7 @@ void ObjFile::initializeSymbols(const object::ELFFile &obj) { if (LLVM_UNLIKELY(eSym.st_shndx == SHN_COMMON)) { if (value == 0 || value >= UINT32_MAX) Fatal(ctx) << this << ": common symbol '" << sym->getName() - << "' has invalid alignment: " << Twine(value); + << "' has invalid alignment: " << value; hasCommonSyms = true; sym->resolve(ctx, CommonSymbol{ctx, this, StringRef(), binding, stOther, type, value, size}); @@ -1214,7 +1213,7 @@ void ObjFile::initSectionsAndLocalSyms(bool ignoreComdats) { else if (secIdx >= SHN_LORESERVE) secIdx = 0; if (LLVM_UNLIKELY(secIdx >= sections.size())) - Fatal(ctx) << this << ": invalid section index: " << Twine(secIdx); + Fatal(ctx) << this << ": invalid section index: " << secIdx; if (LLVM_UNLIKELY(eSym.getBinding() != STB_LOCAL)) ErrAlways(ctx) << this << ": non-local symbol (" << i << ") found at index < .symtab's sh_info (" << end << ")"; @@ -1274,7 +1273,7 @@ template void ObjFile::postParse() { else if (secIdx >= SHN_LORESERVE) secIdx = 0; if (LLVM_UNLIKELY(secIdx >= sections.size())) - Fatal(ctx) << this << ": invalid section index: " << Twine(secIdx); + Fatal(ctx) << this << ": invalid section index: " << secIdx; InputSectionBase *sec = sections[secIdx]; if (sec == &InputSection::discarded) { if (sym.traced) { @@ -1577,8 +1576,8 @@ template void SharedFile::parse() { // as of binutils 2.34, GNU ld produces VER_NDX_LOCAL. if (ver != VER_NDX_LOCAL && ver != VER_NDX_GLOBAL) { if (idx >= verneeds.size()) { - ErrAlways(ctx) << "corrupt input file: version need index " - << Twine(idx) << " for symbol " << name + ErrAlways(ctx) << "corrupt input file: version need index " << idx + << " for symbol " << name << " is out of bounds\n>>> defined in " << this; continue; } @@ -1602,8 +1601,8 @@ template void SharedFile::parse() { // VER_NDX_LOCAL. Workaround this bug. if (ctx.arg.emachine == EM_MIPS && name == "_gp_disp") continue; - ErrAlways(ctx) << "corrupt input file: version definition index " - << Twine(idx) << " for symbol " << name + ErrAlways(ctx) << "corrupt input file: version definition index " << idx + << " for symbol " << name << " is out of bounds\n>>> defined in " << this; continue; } @@ -1849,8 +1848,9 @@ void BitcodeFile::postParse() { void BinaryFile::parse() { ArrayRef data = arrayRefFromStringRef(mb.getBuffer()); - auto *section = make(this, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, - 8, data, ".data"); + auto *section = + make(this, ".data", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE, + /*addralign=*/8, /*entsize=*/0, data); sections.push_back(section); // For each input file foo that is embedded to a result as a binary diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 1221f56dfe68a..75121285b7b23 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -52,13 +52,14 @@ static ArrayRef getSectionContents(ObjFile &file, return check(file.getObj().getSectionContents(hdr)); } -InputSectionBase::InputSectionBase(InputFile *file, uint64_t flags, - uint32_t type, uint64_t entsize, - uint32_t link, uint32_t info, - uint32_t addralign, ArrayRef data, - StringRef name, Kind sectionKind) - : SectionBase(sectionKind, file, name, flags, entsize, addralign, type, - info, link), +InputSectionBase::InputSectionBase(InputFile *file, StringRef name, + uint32_t type, uint64_t flags, uint32_t link, + uint32_t info, uint32_t addralign, + uint32_t entsize, ArrayRef data, + Kind sectionKind) + : SectionBase(sectionKind, file, name, type, flags, link, info, addralign, + entsize), + bss(0), decodedCrel(0), keepUnique(0), nopFiller(0), content_(data.data()), size(data.size()) { // In order to reduce memory allocation, we assume that mergeable // sections are smaller than 4 GiB, which is not an unreasonable @@ -95,10 +96,10 @@ template InputSectionBase::InputSectionBase(ObjFile &file, const typename ELFT::Shdr &hdr, StringRef name, Kind sectionKind) - : InputSectionBase(&file, getFlags(file.ctx, hdr.sh_flags), hdr.sh_type, - hdr.sh_entsize, hdr.sh_link, hdr.sh_info, - hdr.sh_addralign, getSectionContents(file, hdr), name, - sectionKind) { + : InputSectionBase(&file, name, hdr.sh_type, + getFlags(file.ctx, hdr.sh_flags), hdr.sh_link, + hdr.sh_info, hdr.sh_addralign, hdr.sh_entsize, + getSectionContents(file, hdr), sectionKind) { // We reject object files having insanely large alignments even though // they are allowed by the spec. I think 4GB is a reasonable limitation. // We might want to relax this in the future. @@ -273,7 +274,7 @@ void InputSectionBase::parseCompressedHeader(Ctx &ctx) { "not built with zstd support"; } else { ErrAlways(ctx) << this << ": unsupported compression type (" - << Twine(hdr->ch_type) << ")"; + << uint32_t(hdr->ch_type) << ")"; return; } @@ -355,18 +356,19 @@ std::string InputSectionBase::getObjMsg(uint64_t off) const { PotentialSpillSection::PotentialSpillSection(const InputSectionBase &source, InputSectionDescription &isd) - : InputSection(source.file, source.flags, source.type, source.addralign, {}, - source.name, SectionBase::Spill), + : InputSection(source.file, source.name, source.type, source.flags, + source.addralign, source.addralign, {}, SectionBase::Spill), isd(&isd) {} -InputSection InputSection::discarded(nullptr, 0, 0, 0, ArrayRef(), ""); +InputSection InputSection::discarded(nullptr, "", 0, 0, 0, 0, + ArrayRef()); -InputSection::InputSection(InputFile *f, uint64_t flags, uint32_t type, - uint32_t addralign, ArrayRef data, - StringRef name, Kind k) - : InputSectionBase(f, flags, type, - /*Entsize*/ 0, /*Link*/ 0, /*Info*/ 0, addralign, data, - name, k) { +InputSection::InputSection(InputFile *f, StringRef name, uint32_t type, + uint64_t flags, uint32_t addralign, uint32_t entsize, + ArrayRef data, Kind k) + : InputSectionBase(f, name, type, flags, + /*link=*/0, /*info=*/0, addralign, /*entsize=*/entsize, + data, k) { assert(f || this == &InputSection::discarded); } @@ -1092,7 +1094,7 @@ void InputSection::relocateNonAlloc(Ctx &ctx, uint8_t *buf, // R_ABS/R_DTPREL and some other relocations can be used from non-SHF_ALLOC // sections. if (LLVM_LIKELY(expr == R_ABS) || expr == R_DTPREL || expr == R_GOTPLTREL || - expr == R_RISCV_ADD) { + expr == R_RISCV_ADD || expr == R_ARM_SBREL) { target.relocateNoSym(bufLoc, type, SignExtend64(sym.getVA(ctx, addend))); continue; @@ -1437,12 +1439,13 @@ MergeInputSection::MergeInputSection(ObjFile &f, StringRef name) : InputSectionBase(f, header, name, InputSectionBase::Merge) {} -MergeInputSection::MergeInputSection(Ctx &ctx, uint64_t flags, uint32_t type, - uint64_t entsize, ArrayRef data, - StringRef name) - : InputSectionBase(ctx.internalFile, flags, type, entsize, /*link=*/0, +MergeInputSection::MergeInputSection(Ctx &ctx, StringRef name, uint32_t type, + uint64_t flags, uint64_t entsize, + ArrayRef data) + : InputSectionBase(ctx.internalFile, name, type, flags, /*link=*/0, /*info=*/0, - /*alignment=*/entsize, data, name, SectionBase::Merge) {} + /*addralign=*/entsize, entsize, data, + SectionBase::Merge) {} // This function is called after we obtain a complete list of input sections // that need to be linked. This is responsible to split section contents diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index 303452fed60d8..268caa547ffed 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -59,25 +59,17 @@ template struct RelsOrRelas { // sections. class SectionBase { public: - enum Kind { Regular, Synthetic, Spill, EHFrame, Merge, Output, Class }; - - Kind kind() const { return (Kind)sectionKind; } - - LLVM_PREFERRED_TYPE(Kind) - uint8_t sectionKind : 3; - - // The next two bit fields are only used by InputSectionBase, but we - // put them here so the struct packs better. - - LLVM_PREFERRED_TYPE(bool) - uint8_t bss : 1; - - // Set for sections that should not be folded by ICF. - LLVM_PREFERRED_TYPE(bool) - uint8_t keepUnique : 1; + enum Kind : uint8_t { + Regular, + Synthetic, + Spill, + EHFrame, + Merge, + Output, + Class, + }; - uint8_t partition = 1; - uint32_t type; + Kind kind() const { return sectionKind; } // The file which contains this section. For InputSectionBase, its dynamic // type is usually ObjFile, but may be an InputFile of InternalKind @@ -93,10 +85,17 @@ class SectionBase { // These corresponds to the fields in Elf_Shdr. uint64_t flags; - uint32_t addralign; - uint32_t entsize; + uint32_t type; uint32_t link; uint32_t info; + uint32_t addralign; + uint32_t entsize; + + Kind sectionKind; + uint8_t partition = 1; + + // The next two bit fields are only used by InputSectionBase, but we + // put them here so the struct packs better. Ctx &getCtx() const; OutputSection *getOutputSection(); @@ -116,11 +115,11 @@ class SectionBase { protected: constexpr SectionBase(Kind sectionKind, InputFile *file, StringRef name, - uint64_t flags, uint32_t entsize, uint32_t addralign, - uint32_t type, uint32_t info, uint32_t link) - : sectionKind(sectionKind), bss(false), keepUnique(false), type(type), - file(file), name(name), flags(flags), addralign(addralign), - entsize(entsize), link(link), info(info) {} + uint32_t type, uint64_t flags, uint32_t link, + uint32_t info, uint32_t addralign, uint32_t entsize) + : file(file), name(name), flags(flags), type(type), link(link), + info(info), addralign(addralign), entsize(entsize), + sectionKind(sectionKind) {} }; struct SymbolAnchor { @@ -148,15 +147,34 @@ class InputSectionBase : public SectionBase { InputSectionBase(ObjFile &file, const typename ELFT::Shdr &header, StringRef name, Kind sectionKind); - InputSectionBase(InputFile *file, uint64_t flags, uint32_t type, - uint64_t entsize, uint32_t link, uint32_t info, - uint32_t addralign, ArrayRef data, StringRef name, + InputSectionBase(InputFile *file, StringRef name, uint32_t type, + uint64_t flags, uint32_t link, uint32_t info, + uint32_t addralign, uint32_t entsize, ArrayRef data, Kind sectionKind); static bool classof(const SectionBase *s) { return s->kind() != Output && s->kind() != Class; } + LLVM_PREFERRED_TYPE(bool) + uint8_t bss : 1; + + // Whether this section is SHT_CREL and has been decoded to RELA by + // relsOrRelas. + LLVM_PREFERRED_TYPE(bool) + uint8_t decodedCrel : 1; + + // Set for sections that should not be folded by ICF. + LLVM_PREFERRED_TYPE(bool) + uint8_t keepUnique : 1; + + // Whether the section needs to be padded with a NOP filler due to + // deleteFallThruJmpInsn. + LLVM_PREFERRED_TYPE(bool) + uint8_t nopFiller : 1; + + mutable bool compressed = false; + // Input sections are part of an output section. Special sections // like .eh_frame and merge sections are first combined into a // synthetic section that is then added to an output section. In all @@ -176,16 +194,6 @@ class InputSectionBase : public SectionBase { // be reset to zero after uses. uint32_t bytesDropped = 0; - mutable bool compressed = false; - - // Whether this section is SHT_CREL and has been decoded to RELA by - // relsOrRelas. - bool decodedCrel = false; - - // Whether the section needs to be padded with a NOP filler due to - // deleteFallThruJmpInsn. - bool nopFiller = false; - void drop_back(unsigned num) { assert(bytesDropped + num < 256); bytesDropped += num; @@ -315,8 +323,8 @@ class MergeInputSection : public InputSectionBase { template MergeInputSection(ObjFile &f, const typename ELFT::Shdr &header, StringRef name); - MergeInputSection(Ctx &, uint64_t flags, uint32_t type, uint64_t entsize, - ArrayRef data, StringRef name); + MergeInputSection(Ctx &, StringRef name, uint32_t type, uint64_t flags, + uint64_t entsize, ArrayRef data); static bool classof(const SectionBase *s) { return s->kind() == Merge; } void splitIntoPieces(); @@ -394,8 +402,9 @@ class EhInputSection : public InputSectionBase { // .eh_frame. It also includes the synthetic sections themselves. class InputSection : public InputSectionBase { public: - InputSection(InputFile *f, uint64_t flags, uint32_t type, uint32_t addralign, - ArrayRef data, StringRef name, Kind k = Regular); + InputSection(InputFile *f, StringRef name, uint32_t type, uint64_t flags, + uint32_t addralign, uint32_t entsize, ArrayRef data, + Kind k = Regular); template InputSection(ObjFile &f, const typename ELFT::Shdr &header, StringRef name); @@ -466,15 +475,17 @@ class PotentialSpillSection : public InputSection { } }; -static_assert(sizeof(InputSection) <= 160, "InputSection is too big"); +#ifndef _WIN32 +static_assert(sizeof(InputSection) <= 152, "InputSection is too big"); +#endif class SyntheticSection : public InputSection { public: Ctx &ctx; - SyntheticSection(Ctx &ctx, uint64_t flags, uint32_t type, uint32_t addralign, - StringRef name) - : InputSection(ctx.internalFile, flags, type, addralign, {}, name, - InputSectionBase::Synthetic), + SyntheticSection(Ctx &ctx, StringRef name, uint32_t type, uint64_t flags, + uint32_t addralign) + : InputSection(ctx.internalFile, name, type, flags, addralign, + /*entsize=*/0, {}, InputSectionBase::Synthetic), ctx(ctx) {} virtual ~SyntheticSection() = default; diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index d8aa2c46cfa5b..7d24c6750b0d1 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -145,7 +145,9 @@ OutputDesc *LinkerScript::createOutputSection(StringRef name, // There was a forward reference. sec = secRef; } else { - sec = make(ctx, name, SHT_PROGBITS, 0); + descPool.emplace_back( + std::make_unique(ctx, name, SHT_PROGBITS, 0)); + sec = descPool.back().get(); if (!secRef) secRef = sec; } @@ -154,10 +156,14 @@ OutputDesc *LinkerScript::createOutputSection(StringRef name, } OutputDesc *LinkerScript::getOrCreateOutputSection(StringRef name) { - OutputDesc *&cmdRef = nameToOutputSection[CachedHashStringRef(name)]; - if (!cmdRef) - cmdRef = make(ctx, name, SHT_PROGBITS, 0); - return cmdRef; + auto &secRef = nameToOutputSection[CachedHashStringRef(name)]; + if (!secRef) { + secRef = descPool + .emplace_back( + std::make_unique(ctx, name, SHT_PROGBITS, 0)) + .get(); + } + return secRef; } // Expands the memory region by the specified size. @@ -1778,7 +1784,7 @@ static void checkMemoryRegion(Ctx &ctx, const MemoryRegion *region, if (osecEnd > regionEnd) { ErrAlways(ctx) << "section '" << osec->name << "' will not fit in region '" << region->name << "': overflowed by " - << Twine(osecEnd - regionEnd) << " bytes"; + << (osecEnd - regionEnd) << " bytes"; } } diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h index f5408b4ba3037..328368fd3b433 100644 --- a/lld/ELF/LinkerScript.h +++ b/lld/ELF/LinkerScript.h @@ -299,6 +299,7 @@ class LinkerScript final { }; Ctx &ctx; + SmallVector, 0> descPool; llvm::DenseMap nameToOutputSection; StringRef getOutputSectionName(const InputSectionBase *s) const; diff --git a/lld/ELF/MapFile.cpp b/lld/ELF/MapFile.cpp index f18d799a8c4e4..138d35951a3bb 100644 --- a/lld/ELF/MapFile.cpp +++ b/lld/ELF/MapFile.cpp @@ -59,7 +59,9 @@ static std::vector getSymbols(Ctx &ctx) { for (Symbol *b : file->getSymbols()) if (auto *dr = dyn_cast(b)) if (!dr->isSection() && dr->section && dr->section->isLive() && - (dr->file == file || dr->hasFlag(NEEDS_COPY) || dr->section->bss)) + (dr->file == file || dr->hasFlag(NEEDS_COPY) || + (isa(dr->section) && + cast(dr->section)->bss))) v.push_back(dr); return v; } diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp index 9bcbea250e7db..31d14df9be71e 100644 --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -67,9 +67,8 @@ void OutputSection::writeHeaderTo(typename ELFT::Shdr *shdr) { OutputSection::OutputSection(Ctx &ctx, StringRef name, uint32_t type, uint64_t flags) - : SectionBase(Output, ctx.internalFile, name, flags, /*entsize=*/0, - /*addralign=*/1, type, - /*info=*/0, /*link=*/0), + : SectionBase(Output, ctx.internalFile, name, type, flags, /*link=*/0, + /*info=*/0, /*addralign=*/1, /*entsize=*/0), ctx(ctx) {} uint64_t OutputSection::getLMA() const { diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index e110adead5ad0..d311dba41741c 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -74,13 +74,12 @@ static std::optional getLinkerScriptLocation(Ctx &ctx, return std::nullopt; } -static std::string getDefinedLocation(Ctx &ctx, const Symbol &sym) { - const char msg[] = "\n>>> defined in "; +static void printDefinedLocation(ELFSyncStream &s, const Symbol &sym) { + s << "\n>>> defined in "; if (sym.file) - return msg + toStr(ctx, sym.file); - if (std::optional loc = getLinkerScriptLocation(ctx, sym)) - return msg + *loc; - return ""; + return void(s << sym.file); + if (std::optional loc = getLinkerScriptLocation(s.ctx, sym)) + return void(s << *loc); } // Construct a message in the following format. @@ -88,13 +87,14 @@ static std::string getDefinedLocation(Ctx &ctx, const Symbol &sym) { // >>> defined in /home/alice/src/foo.o // >>> referenced by bar.c:12 (/home/alice/src/bar.c:12) // >>> /home/alice/src/bar.o:(.text+0x1) -static std::string getLocation(Ctx &ctx, InputSectionBase &s, const Symbol &sym, - uint64_t off) { - std::string msg = getDefinedLocation(ctx, sym) + "\n>>> referenced by "; - std::string src = s.getSrcMsg(sym, off); +static void printLocation(ELFSyncStream &s, InputSectionBase &sec, + const Symbol &sym, uint64_t off) { + printDefinedLocation(s, sym); + s << "\n>>> referenced by "; + std::string src = sec.getSrcMsg(sym, off); if (!src.empty()) - msg += src + "\n>>> "; - return msg + s.getObjMsg(off); + s << src << "\n>>> "; + s << sec.getObjMsg(off); } void elf::reportRangeError(Ctx &ctx, uint8_t *loc, const Relocation &rel, @@ -121,7 +121,7 @@ void elf::reportRangeError(Ctx &ctx, uint8_t *loc, const Relocation &rel, if (!errPlace.srcLoc.empty()) diag << "\n>>> referenced by " << errPlace.srcLoc; if (rel.sym && !rel.sym->isSection()) - diag << getDefinedLocation(ctx, *rel.sym); + printDefinedLocation(diag, *rel.sym); if (errPlace.isec && errPlace.isec->name.starts_with(".debug")) diag << "; consider recompiling with -fdebug-types-section to reduce size " @@ -133,8 +133,10 @@ void elf::reportRangeError(Ctx &ctx, uint8_t *loc, int64_t v, int n, auto diag = Err(ctx); diag << getErrorPlace(ctx, loc).loc << msg << " is out of range: " << v << " is not in [" << llvm::minIntN(n) << ", " << llvm::maxIntN(n) << "]"; - if (!sym.getName().empty()) - diag << "; references '" << &sym << '\'' << getDefinedLocation(ctx, sym); + if (!sym.getName().empty()) { + diag << "; references '" << &sym << '\''; + printDefinedLocation(diag, sym); + } } // Build a bitmask with one bit set for each 64 subset of RelExpr. @@ -522,42 +524,39 @@ int64_t RelocationScanner::computeMipsAddend(const RelTy &rel, RelExpr expr, // Custom error message if Sym is defined in a discarded section. template -static std::string maybeReportDiscarded(Ctx &ctx, Undefined &sym) { +static void maybeReportDiscarded(Ctx &ctx, ELFSyncStream &msg, Undefined &sym) { auto *file = dyn_cast_or_null>(sym.file); if (!file || !sym.discardedSecIdx) - return ""; + return; ArrayRef objSections = file->template getELFShdrs(); - std::string msg; if (sym.type == ELF::STT_SECTION) { - msg = "relocation refers to a discarded section: "; - msg += CHECK2( + msg << "relocation refers to a discarded section: "; + msg << CHECK2( file->getObj().getSectionName(objSections[sym.discardedSecIdx]), file); } else { - msg = "relocation refers to a symbol in a discarded section: " + - toStr(ctx, sym); + msg << "relocation refers to a symbol in a discarded section: " << &sym; } - msg += "\n>>> defined in " + toStr(ctx, file); + msg << "\n>>> defined in " << file; Elf_Shdr_Impl elfSec = objSections[sym.discardedSecIdx - 1]; if (elfSec.sh_type != SHT_GROUP) - return msg; + return; // If the discarded section is a COMDAT. StringRef signature = file->getShtGroupSignature(objSections, elfSec); if (const InputFile *prevailing = ctx.symtab->comdatGroups.lookup(CachedHashStringRef(signature))) { - msg += "\n>>> section group signature: " + signature.str() + - "\n>>> prevailing definition is in " + toStr(ctx, prevailing); + msg << "\n>>> section group signature: " << signature + << "\n>>> prevailing definition is in " << prevailing; if (sym.nonPrevailing) { - msg += "\n>>> or the symbol in the prevailing group had STB_WEAK " + msg << "\n>>> or the symbol in the prevailing group had STB_WEAK " "binding and the symbol in a non-prevailing group had STB_GLOBAL " "binding. Mixing groups with STB_WEAK and STB_GLOBAL binding " "signature is not supported"; } } - return msg; } // Check whether the definition name def is a mangled function name that matches @@ -695,8 +694,9 @@ static const Symbol *getAlternativeSpelling(Ctx &ctx, const Undefined &sym, static void reportUndefinedSymbol(Ctx &ctx, const UndefinedDiag &undef, bool correctSpelling) { Undefined &sym = *undef.sym; + ELFSyncStream msg(ctx, DiagLevel::None); - auto visibility = [&]() -> std::string { + auto visibility = [&]() { switch (sym.visibility()) { case STV_INTERNAL: return "internal "; @@ -709,75 +709,70 @@ static void reportUndefinedSymbol(Ctx &ctx, const UndefinedDiag &undef, } }; - std::string msg; switch (ctx.arg.ekind) { case ELF32LEKind: - msg = maybeReportDiscarded(ctx, sym); + maybeReportDiscarded(ctx, msg, sym); break; case ELF32BEKind: - msg = maybeReportDiscarded(ctx, sym); + maybeReportDiscarded(ctx, msg, sym); break; case ELF64LEKind: - msg = maybeReportDiscarded(ctx, sym); + maybeReportDiscarded(ctx, msg, sym); break; case ELF64BEKind: - msg = maybeReportDiscarded(ctx, sym); + maybeReportDiscarded(ctx, msg, sym); break; default: llvm_unreachable(""); } - if (msg.empty()) - msg = "undefined " + visibility() + "symbol: " + toStr(ctx, sym); + if (msg.str().empty()) + msg << "undefined " << visibility() << "symbol: " << &sym; const size_t maxUndefReferences = 3; - size_t i = 0; - for (UndefinedDiag::Loc l : undef.locs) { - if (i >= maxUndefReferences) - break; + for (UndefinedDiag::Loc l : + ArrayRef(undef.locs).take_front(maxUndefReferences)) { InputSectionBase &sec = *l.sec; uint64_t offset = l.offset; - msg += "\n>>> referenced by "; + msg << "\n>>> referenced by "; // In the absence of line number information, utilize DW_TAG_variable (if // present) for the enclosing symbol (e.g. var in `int *a[] = {&undef};`). Symbol *enclosing = sec.getEnclosingSymbol(offset); std::string src = sec.getSrcMsg(enclosing ? *enclosing : sym, offset); if (!src.empty()) - msg += src + "\n>>> "; - msg += sec.getObjMsg(offset); - i++; + msg << src << "\n>>> "; + msg << sec.getObjMsg(offset); } - if (i < undef.locs.size()) - msg += ("\n>>> referenced " + Twine(undef.locs.size() - i) + " more times") - .str(); + if (maxUndefReferences < undef.locs.size()) + msg << "\n>>> referenced " << (undef.locs.size() - maxUndefReferences) + << " more times"; if (correctSpelling) { std::string pre_hint = ": ", post_hint; if (const Symbol *corrected = getAlternativeSpelling(ctx, sym, pre_hint, post_hint)) { - msg += - "\n>>> did you mean" + pre_hint + toStr(ctx, *corrected) + post_hint; + msg << "\n>>> did you mean" << pre_hint << corrected << post_hint; if (corrected->file) - msg += "\n>>> defined in: " + toStr(ctx, corrected->file); + msg << "\n>>> defined in: " << corrected->file; } } if (sym.getName().starts_with("_ZTV")) - msg += - "\n>>> the vtable symbol may be undefined because the class is missing " - "its key function (see https://lld.llvm.org/missingkeyfunction)"; + msg << "\n>>> the vtable symbol may be undefined because the class is " + "missing its key function " + "(see https://lld.llvm.org/missingkeyfunction)"; if (ctx.arg.gcSections && ctx.arg.zStartStopGC && sym.getName().starts_with("__start_")) { - msg += "\n>>> the encapsulation symbol needs to be retained under " + msg << "\n>>> the encapsulation symbol needs to be retained under " "--gc-sections properly; consider -z nostart-stop-gc " "(see https://lld.llvm.org/ELF/start-stop-gc)"; } if (undef.isWarning) - Warn(ctx) << msg; + Warn(ctx) << msg.str(); else - ctx.e.error(msg, ErrorTag::SymbolNotFound, {sym.getName()}); + ctx.e.error(msg.str(), ErrorTag::SymbolNotFound, {sym.getName()}); } void elf::reportUndefinedSymbols(Ctx &ctx) { @@ -1020,9 +1015,9 @@ bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, RelType type, if (sym.scriptDefined) return true; - Err(ctx) << "relocation " << type - << " cannot refer to absolute symbol: " << &sym - << getLocation(ctx, *sec, sym, relOff); + auto diag = Err(ctx); + diag << "relocation " << type << " cannot refer to absolute symbol: " << &sym; + printLocation(diag, *sec, sym, relOff); return true; } @@ -1188,18 +1183,21 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset, if (!ctx.arg.shared && sym.isShared() && !(ctx.arg.emachine == EM_AARCH64 && type == R_AARCH64_AUTH_ABS64)) { if (!canDefineSymbolInExecutable(ctx, sym)) { - Err(ctx) << "cannot preempt symbol: " << &sym - << getLocation(ctx, *sec, sym, offset); + auto diag = Err(ctx); + diag << "cannot preempt symbol: " << &sym; + printLocation(diag, *sec, sym, offset); return; } if (sym.isObject()) { // Produce a copy relocation. if (auto *ss = dyn_cast(&sym)) { - if (!ctx.arg.zCopyreloc) - Err(ctx) << "unresolvable relocation " << type << " against symbol '" - << ss << "'; recompile with -fPIC or remove '-z nocopyreloc'" - << getLocation(ctx, *sec, sym, offset); + if (!ctx.arg.zCopyreloc) { + auto diag = Err(ctx); + diag << "unresolvable relocation " << type << " against symbol '" + << ss << "'; recompile with -fPIC or remove '-z nocopyreloc'"; + printLocation(diag, *sec, sym, offset); + } sym.setFlags(NEEDS_COPY); } sec->addReloc({expr, type, offset, addend, &sym}); @@ -1234,20 +1232,26 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset, // * If a library definition gets preempted to the executable, it will have // the wrong ebx value. if (sym.isFunc()) { - if (ctx.arg.pie && ctx.arg.emachine == EM_386) - Err(ctx) << "symbol '" << &sym - << "' cannot be preempted; recompile with -fPIE" - << getLocation(ctx, *sec, sym, offset); + if (ctx.arg.pie && ctx.arg.emachine == EM_386) { + auto diag = Err(ctx); + diag << "symbol '" << &sym + << "' cannot be preempted; recompile with -fPIE"; + printLocation(diag, *sec, sym, offset); + } sym.setFlags(NEEDS_COPY | NEEDS_PLT); sec->addReloc({expr, type, offset, addend, &sym}); return; } } - Err(ctx) << "relocation " << type << " cannot be used against " - << (sym.getName().empty() ? "local symbol" - : ("symbol '" + toStr(ctx, sym) + "'")) - << "; recompile with -fPIC" << getLocation(ctx, *sec, sym, offset); + auto diag = Err(ctx); + diag << "relocation " << type << " cannot be used against "; + if (sym.getName().empty()) + diag << "local symbol"; + else + diag << "symbol '" << &sym << "'"; + diag << "; recompile with -fPIC"; + printLocation(diag, *sec, sym, offset); } // This function is similar to the `handleTlsRelocation`. MIPS does not @@ -1284,9 +1288,10 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type, int64_t addend) { if (expr == R_TPREL || expr == R_TPREL_NEG) { if (ctx.arg.shared) { - Err(ctx) << "relocation " << type << " against " << &sym - << " cannot be used with -shared" - << getLocation(ctx, *sec, sym, offset); + auto diag = Err(ctx); + diag << "relocation " << type << " against " << &sym + << " cannot be used with -shared"; + printLocation(diag, *sec, sym, offset); return 1; } return 0; @@ -1493,9 +1498,10 @@ void RelocationScanner::scanOne(typename Relocs::const_iterator &i) { // Skip the error check for CREL, which does not set `end`. if constexpr (!RelTy::IsCrel) { if (i == end) { - Err(ctx) << "R_PPC64_TLSGD/R_PPC64_TLSLD may not be the last " - "relocation" - << getLocation(ctx, *sec, sym, offset); + auto diag = Err(ctx); + diag << "R_PPC64_TLSGD/R_PPC64_TLSLD may not be the last " + "relocation"; + printLocation(diag, *sec, sym, offset); return; } } diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 7e5e713513c47..21fe2a25fa1bd 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -89,8 +89,8 @@ static ArrayRef getVersion(Ctx &ctx) { // The returned object is a mergeable string section. MergeInputSection *elf::createCommentSection(Ctx &ctx) { auto *sec = - make(ctx, SHF_MERGE | SHF_STRINGS, SHT_PROGBITS, 1, - getVersion(ctx), ".comment"); + make(ctx, ".comment", SHT_PROGBITS, + SHF_MERGE | SHF_STRINGS, 1, getVersion(ctx)); sec->splitIntoPieces(); return sec; } @@ -99,7 +99,7 @@ MergeInputSection *elf::createCommentSection(Ctx &ctx) { template MipsAbiFlagsSection::MipsAbiFlagsSection(Ctx &ctx, Elf_Mips_ABIFlags flags) - : SyntheticSection(ctx, SHF_ALLOC, SHT_MIPS_ABIFLAGS, 8, ".MIPS.abiflags"), + : SyntheticSection(ctx, ".MIPS.abiflags", SHT_MIPS_ABIFLAGS, SHF_ALLOC, 8), flags(flags) { this->entsize = sizeof(Elf_Mips_ABIFlags); } @@ -120,23 +120,20 @@ MipsAbiFlagsSection::create(Ctx &ctx) { sec->markDead(); create = true; - std::string filename = toStr(ctx, sec->file); const size_t size = sec->content().size(); // Older version of BFD (such as the default FreeBSD linker) concatenate // .MIPS.abiflags instead of merging. To allow for this case (or potential // zero padding) we ignore everything after the first Elf_Mips_ABIFlags if (size < sizeof(Elf_Mips_ABIFlags)) { - ErrAlways(ctx) << filename - << ": invalid size of .MIPS.abiflags section: got " - << Twine(size) << " instead of " - << Twine(sizeof(Elf_Mips_ABIFlags)); + Err(ctx) << sec->file << ": invalid size of .MIPS.abiflags section: got " + << size << " instead of " << sizeof(Elf_Mips_ABIFlags); return nullptr; } auto *s = reinterpret_cast(sec->content().data()); if (s->version != 0) { - ErrAlways(ctx) << filename << ": unexpected .MIPS.abiflags version " - << Twine(s->version); + Err(ctx) << sec->file << ": unexpected .MIPS.abiflags version " + << s->version; return nullptr; } @@ -152,7 +149,7 @@ MipsAbiFlagsSection::create(Ctx &ctx) { flags.flags1 |= s->flags1; flags.flags2 |= s->flags2; flags.fp_abi = - elf::getMipsFpAbiFlag(ctx, flags.fp_abi, s->fp_abi, filename); + elf::getMipsFpAbiFlag(ctx, sec->file, flags.fp_abi, s->fp_abi); }; if (create) @@ -163,7 +160,7 @@ MipsAbiFlagsSection::create(Ctx &ctx) { // .MIPS.options section. template MipsOptionsSection::MipsOptionsSection(Ctx &ctx, Elf_Mips_RegInfo reginfo) - : SyntheticSection(ctx, SHF_ALLOC, SHT_MIPS_OPTIONS, 8, ".MIPS.options"), + : SyntheticSection(ctx, ".MIPS.options", SHT_MIPS_OPTIONS, SHF_ALLOC, 8), reginfo(reginfo) { this->entsize = sizeof(Elf_Mips_Options) + sizeof(Elf_Mips_RegInfo); } @@ -197,12 +194,10 @@ MipsOptionsSection::create(Ctx &ctx) { for (InputSectionBase *sec : sections) { sec->markDead(); - std::string filename = toStr(ctx, sec->file); ArrayRef d = sec->content(); - while (!d.empty()) { if (d.size() < sizeof(Elf_Mips_Options)) { - ErrAlways(ctx) << filename << ": invalid size of .MIPS.options section"; + Err(ctx) << sec->file << ": invalid size of .MIPS.options section"; break; } @@ -213,8 +208,10 @@ MipsOptionsSection::create(Ctx &ctx) { break; } - if (!opt->size) - Fatal(ctx) << filename << ": zero option descriptor size"; + if (!opt->size) { + Err(ctx) << sec->file << ": zero option descriptor size"; + break; + } d = d.slice(opt->size); } }; @@ -225,7 +222,7 @@ MipsOptionsSection::create(Ctx &ctx) { // MIPS .reginfo section. template MipsReginfoSection::MipsReginfoSection(Ctx &ctx, Elf_Mips_RegInfo reginfo) - : SyntheticSection(ctx, SHF_ALLOC, SHT_MIPS_REGINFO, 4, ".reginfo"), + : SyntheticSection(ctx, ".reginfo", SHT_MIPS_REGINFO, SHF_ALLOC, 4), reginfo(reginfo) { this->entsize = sizeof(Elf_Mips_RegInfo); } @@ -256,7 +253,7 @@ MipsReginfoSection::create(Ctx &ctx) { sec->markDead(); if (sec->content().size() != sizeof(Elf_Mips_RegInfo)) { - ErrAlways(ctx) << sec->file << ": invalid size of .reginfo section"; + Err(ctx) << sec->file << ": invalid size of .reginfo section"; return nullptr; } @@ -273,8 +270,9 @@ InputSection *elf::createInterpSection(Ctx &ctx) { StringRef s = ctx.saver.save(ctx.arg.dynamicLinker); ArrayRef contents = {(const uint8_t *)s.data(), s.size() + 1}; - return make(ctx.internalFile, SHF_ALLOC, SHT_PROGBITS, 1, - contents, ".interp"); + return make(ctx.internalFile, ".interp", SHT_PROGBITS, + SHF_ALLOC, + /*addralign=*/1, /*entsize=*/0, contents); } Defined *elf::addSyntheticLocal(Ctx &ctx, StringRef name, uint8_t type, @@ -323,8 +321,8 @@ static size_t getHashSize(Ctx &ctx) { // sets is empty, or some input files didn't have .note.gnu.property sections), // we don't create this section. GnuPropertySection::GnuPropertySection(Ctx &ctx) - : SyntheticSection(ctx, llvm::ELF::SHF_ALLOC, llvm::ELF::SHT_NOTE, - ctx.arg.wordsize, ".note.gnu.property") {} + : SyntheticSection(ctx, ".note.gnu.property", SHT_NOTE, SHF_ALLOC, + ctx.arg.wordsize) {} void GnuPropertySection::writeTo(uint8_t *buf) { write32(ctx, buf, 4); // Name size @@ -365,7 +363,7 @@ size_t GnuPropertySection::getSize() const { } BuildIdSection::BuildIdSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC, SHT_NOTE, 4, ".note.gnu.build-id"), + : SyntheticSection(ctx, ".note.gnu.build-id", SHT_NOTE, SHF_ALLOC, 4), hashSize(getHashSize(ctx)) {} void BuildIdSection::writeTo(uint8_t *buf) { @@ -383,14 +381,14 @@ void BuildIdSection::writeBuildId(ArrayRef buf) { BssSection::BssSection(Ctx &ctx, StringRef name, uint64_t size, uint32_t alignment) - : SyntheticSection(ctx, SHF_ALLOC | SHF_WRITE, SHT_NOBITS, alignment, - name) { + : SyntheticSection(ctx, name, SHT_NOBITS, SHF_ALLOC | SHF_WRITE, + alignment) { this->bss = true; this->size = size; } EhFrameSection::EhFrameSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC, SHT_PROGBITS, 1, ".eh_frame") {} + : SyntheticSection(ctx, ".eh_frame", SHT_PROGBITS, SHF_ALLOC, 1) {} // Search for an existing CIE record or create a new one. // CIE records from input object files are uniquified by their contents @@ -661,8 +659,8 @@ void EhFrameSection::writeTo(uint8_t *buf) { } GotSection::GotSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, - ctx.target->gotEntrySize, ".got") { + : SyntheticSection(ctx, ".got", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE, + ctx.target->gotEntrySize) { numEntries = ctx.target->gotHeaderEntriesNum; } @@ -745,8 +743,8 @@ static uint64_t getMipsPageCount(uint64_t size) { } MipsGotSection::MipsGotSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC | SHF_WRITE | SHF_MIPS_GPREL, - SHT_PROGBITS, 16, ".got") {} + : SyntheticSection(ctx, ".got", SHT_PROGBITS, + SHF_ALLOC | SHF_WRITE | SHF_MIPS_GPREL, 16) {} void MipsGotSection::addEntry(InputFile &file, Symbol &sym, int64_t addend, RelExpr expr) { @@ -1179,8 +1177,8 @@ void MipsGotSection::writeTo(uint8_t *buf) { // section. I don't know why we have a BSS style type for the section but it is // consistent across both 64-bit PowerPC ABIs as well as the 32-bit PowerPC ABI. GotPltSection::GotPltSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, - ctx.arg.wordsize, ".got.plt") { + : SyntheticSection(ctx, ".got.plt", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE, + ctx.arg.wordsize) { if (ctx.arg.emachine == EM_PPC) { name = ".plt"; } else if (ctx.arg.emachine == EM_PPC64) { @@ -1231,9 +1229,9 @@ static StringRef getIgotPltName(Ctx &ctx) { // On PowerPC64 the GotPltSection type is SHT_NOBITS so we have to follow suit // with the IgotPltSection. IgotPltSection::IgotPltSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC | SHF_WRITE, + : SyntheticSection(ctx, getIgotPltName(ctx), ctx.arg.emachine == EM_PPC64 ? SHT_NOBITS : SHT_PROGBITS, - ctx.target->gotEntrySize, getIgotPltName(ctx)) {} + SHF_ALLOC | SHF_WRITE, ctx.target->gotEntrySize) {} void IgotPltSection::addEntry(Symbol &sym) { assert(ctx.symAux.back().pltIdx == entries.size()); @@ -1252,8 +1250,8 @@ void IgotPltSection::writeTo(uint8_t *buf) { } StringTableSection::StringTableSection(Ctx &ctx, StringRef name, bool dynamic) - : SyntheticSection(ctx, dynamic ? (uint64_t)SHF_ALLOC : 0, SHT_STRTAB, 1, - name), + : SyntheticSection(ctx, name, SHT_STRTAB, dynamic ? (uint64_t)SHF_ALLOC : 0, + 1), dynamic(dynamic) { // ELF string tables start with a NUL byte. strings.push_back(""); @@ -1296,8 +1294,8 @@ static unsigned getVerDefNum(Ctx &ctx) { template DynamicSection::DynamicSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC | SHF_WRITE, SHT_DYNAMIC, - ctx.arg.wordsize, ".dynamic") { + : SyntheticSection(ctx, ".dynamic", SHT_DYNAMIC, SHF_ALLOC | SHF_WRITE, + ctx.arg.wordsize) { this->entsize = ELFT::Is64Bits ? 16 : 8; // .dynamic section is not writable on MIPS and on Fuchsia OS @@ -1651,7 +1649,7 @@ RelocationBaseSection::RelocationBaseSection(Ctx &ctx, StringRef name, int32_t sizeDynamicTag, bool combreloc, unsigned concurrency) - : SyntheticSection(ctx, SHF_ALLOC, type, ctx.arg.wordsize, name), + : SyntheticSection(ctx, name, type, SHF_ALLOC, ctx.arg.wordsize), dynamicTag(dynamicTag), sizeDynamicTag(sizeDynamicTag), relocsVec(concurrency), combreloc(combreloc) {} @@ -1767,11 +1765,11 @@ template void RelocationSection::writeTo(uint8_t *buf) { RelrBaseSection::RelrBaseSection(Ctx &ctx, unsigned concurrency, bool isAArch64Auth) : SyntheticSection( - ctx, SHF_ALLOC, + ctx, isAArch64Auth ? ".relr.auth.dyn" : ".relr.dyn", isAArch64Auth ? SHT_AARCH64_AUTH_RELR : (ctx.arg.useAndroidRelrTags ? SHT_ANDROID_RELR : SHT_RELR), - ctx.arg.wordsize, isAArch64Auth ? ".relr.auth.dyn" : ".relr.dyn"), + SHF_ALLOC, ctx.arg.wordsize), relocsVec(concurrency) {} void RelrBaseSection::mergeRels() { @@ -2118,8 +2116,8 @@ template bool RelrSection::updateAllocSize(Ctx &ctx) { // Don't allow the section to shrink; otherwise the size of the section can // oscillate infinitely. Trailing 1s do not decode to more relocations. if (relrRelocs.size() < oldSize) { - Log(ctx) << ".relr.dyn needs " << Twine(oldSize - relrRelocs.size()) << - " padding word(s)"; + Log(ctx) << ".relr.dyn needs " << (oldSize - relrRelocs.size()) + << " padding word(s)"; relrRelocs.resize(oldSize, Elf_Relr(1)); } @@ -2128,10 +2126,10 @@ template bool RelrSection::updateAllocSize(Ctx &ctx) { SymbolTableBaseSection::SymbolTableBaseSection(Ctx &ctx, StringTableSection &strTabSec) - : SyntheticSection(ctx, strTabSec.isDynamic() ? (uint64_t)SHF_ALLOC : 0, + : SyntheticSection(ctx, strTabSec.isDynamic() ? ".dynsym" : ".symtab", strTabSec.isDynamic() ? SHT_DYNSYM : SHT_SYMTAB, - ctx.arg.wordsize, - strTabSec.isDynamic() ? ".dynsym" : ".symtab"), + strTabSec.isDynamic() ? (uint64_t)SHF_ALLOC : 0, + ctx.arg.wordsize), strTabSec(strTabSec) {} // Orders symbols according to their positions in the GOT, @@ -2348,7 +2346,7 @@ template void SymbolTableSection::writeTo(uint8_t *buf) { } SymtabShndxSection::SymtabShndxSection(Ctx &ctx) - : SyntheticSection(ctx, 0, SHT_SYMTAB_SHNDX, 4, ".symtab_shndx") { + : SyntheticSection(ctx, ".symtab_shndx", SHT_SYMTAB_SHNDX, 0, 4) { this->entsize = 4; } @@ -2419,8 +2417,8 @@ size_t SymtabShndxSection::getSize() const { // about .gnu.hash, you want to specify --hash-style=gnu. Otherwise, a // safe bet is to specify --hash-style=both for backward compatibility. GnuHashTableSection::GnuHashTableSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC, SHT_GNU_HASH, ctx.arg.wordsize, - ".gnu.hash") {} + : SyntheticSection(ctx, ".gnu.hash", SHT_GNU_HASH, SHF_ALLOC, + ctx.arg.wordsize) {} void GnuHashTableSection::finalizeContents() { if (OutputSection *sec = getPartition(ctx).dynSymTab->getParent()) @@ -2529,7 +2527,7 @@ void GnuHashTableSection::addSymbols(SmallVectorImpl &v) { } HashTableSection::HashTableSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC, SHT_HASH, 4, ".hash") { + : SyntheticSection(ctx, ".hash", SHT_HASH, SHF_ALLOC, 4) { this->entsize = 4; } @@ -2569,8 +2567,8 @@ void HashTableSection::writeTo(uint8_t *buf) { } PltSection::PltSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, - ".plt"), + : SyntheticSection(ctx, ".plt", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, + 16), headerSize(ctx.target->pltHeaderSize) { // On PowerPC, this section contains lazy symbol resolvers. if (ctx.arg.emachine == EM_PPC64) { @@ -2630,8 +2628,8 @@ void PltSection::addSymbols() { } IpltSection::IpltSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, - ".iplt") { + : SyntheticSection(ctx, ".iplt", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, + 16) { if (ctx.arg.emachine == EM_PPC || ctx.arg.emachine == EM_PPC64) { name = ".glink"; addralign = 4; @@ -2737,8 +2735,8 @@ size_t PPC32GlinkSection::getSize() const { // That said, the 2-PLT scheme is a part of the ABI, debuggers and other tools // depend on it, so we implement the ABI. IBTPltSection::IBTPltSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, - ".plt") {} + : SyntheticSection(ctx, ".plt", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, + 16) {} void IBTPltSection::writeTo(uint8_t *buf) { ctx.target->writeIBTPlt(buf, ctx.in.plt->getNumEntries()); @@ -2752,8 +2750,8 @@ size_t IBTPltSection::getSize() const { bool IBTPltSection::isNeeded() const { return ctx.in.plt->getNumEntries() > 0; } RelroPaddingSection::RelroPaddingSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC | SHF_WRITE, SHT_NOBITS, 1, - ".relro_padding") {} + : SyntheticSection(ctx, ".relro_padding", SHT_NOBITS, SHF_ALLOC | SHF_WRITE, + 1) {} // The string hash function for .gdb_index. static uint32_t computeGdbHash(StringRef s) { @@ -2766,7 +2764,7 @@ static uint32_t computeGdbHash(StringRef s) { // 4-byte alignment ensures that values in the hash lookup table and the name // table are aligned. DebugNamesBaseSection::DebugNamesBaseSection(Ctx &ctx) - : SyntheticSection(ctx, 0, SHT_PROGBITS, 4, ".debug_names") {} + : SyntheticSection(ctx, ".debug_names", SHT_PROGBITS, 0, 4) {} // Get the size of the .debug_names section header in bytes for DWARF32: static uint32_t getDebugNamesHeaderSize(uint32_t augmentationStringSize) { @@ -2872,7 +2870,7 @@ void DebugNamesBaseSection::parseDebugNames( nd.hdr = ni.getHeader(); if (nd.hdr.Format != DwarfFormat::DWARF32) { Err(ctx) << namesSec.sec - << Twine(": found DWARF64, which is currently unsupported"); + << ": found DWARF64, which is currently unsupported"; return; } if (nd.hdr.Version != 5) { @@ -2882,8 +2880,7 @@ void DebugNamesBaseSection::parseDebugNames( uint32_t dwarfSize = dwarf::getDwarfOffsetByteSize(DwarfFormat::DWARF32); DWARFDebugNames::DWARFDebugNamesOffsets locs = ni.getOffsets(); if (locs.EntriesBase > namesExtractor.getData().size()) { - Err(ctx) << namesSec.sec - << Twine(": entry pool start is beyond end of section"); + Err(ctx) << namesSec.sec << ": entry pool start is beyond end of section"; return; } @@ -2964,7 +2961,7 @@ void DebugNamesBaseSection::computeHdrAndAbbrevTable( // ForeignTypeUnitCount are left as 0. if (nd.hdr.LocalTypeUnitCount || nd.hdr.ForeignTypeUnitCount) Warn(ctx) << inputChunk.section.sec - << Twine(": type units are not implemented"); + << ": type units are not implemented"; // If augmentation strings are not identical, use an empty string. if (i == 0) { hdr.AugmentationStringSize = nd.hdr.AugmentationStringSize; @@ -3358,7 +3355,7 @@ template void DebugNamesSection::writeTo(uint8_t *buf) { } GdbIndexSection::GdbIndexSection(Ctx &ctx) - : SyntheticSection(ctx, 0, SHT_PROGBITS, 1, ".gdb_index") {} + : SyntheticSection(ctx, ".gdb_index", SHT_PROGBITS, 0, 1) {} // Returns the desired size of an on-disk hash table for a .gdb_index section. // There's a tradeoff between size and collision rate. We aim 75% utilization. @@ -3654,7 +3651,7 @@ void GdbIndexSection::writeTo(uint8_t *buf) { bool GdbIndexSection::isNeeded() const { return !chunks.empty(); } EhFrameHeader::EhFrameHeader(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC, SHT_PROGBITS, 4, ".eh_frame_hdr") {} + : SyntheticSection(ctx, ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, 4) {} void EhFrameHeader::writeTo(uint8_t *buf) { // Unlike most sections, the EhFrameHeader section is written while writing @@ -3699,8 +3696,8 @@ bool EhFrameHeader::isNeeded() const { } VersionDefinitionSection::VersionDefinitionSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC, SHT_GNU_verdef, sizeof(uint32_t), - ".gnu.version_d") {} + : SyntheticSection(ctx, ".gnu.version_d", SHT_GNU_verdef, SHF_ALLOC, + sizeof(uint32_t)) {} StringRef VersionDefinitionSection::getFileDefName() { if (!getPartition(ctx).name.empty()) @@ -3761,8 +3758,8 @@ size_t VersionDefinitionSection::getSize() const { // .gnu.version is a table where each entry is 2 byte long. VersionTableSection::VersionTableSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC, SHT_GNU_versym, sizeof(uint16_t), - ".gnu.version") { + : SyntheticSection(ctx, ".gnu.version", SHT_GNU_versym, SHF_ALLOC, + sizeof(uint16_t)) { this->entsize = 2; } @@ -3812,8 +3809,8 @@ void elf::addVerneed(Ctx &ctx, Symbol &ss) { template VersionNeedSection::VersionNeedSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC, SHT_GNU_verneed, sizeof(uint32_t), - ".gnu.version_r") {} + : SyntheticSection(ctx, ".gnu.version_r", SHT_GNU_verneed, SHF_ALLOC, + sizeof(uint32_t)) {} template void VersionNeedSection::finalizeContents() { for (SharedFile *f : ctx.sharedFiles) { @@ -4020,12 +4017,12 @@ void elf::combineEhSections(Ctx &ctx) { } MipsRldMapSection::MipsRldMapSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, - ctx.arg.wordsize, ".rld_map") {} + : SyntheticSection(ctx, ".rld_map", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE, + ctx.arg.wordsize) {} ARMExidxSyntheticSection::ARMExidxSyntheticSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC | SHF_LINK_ORDER, SHT_ARM_EXIDX, - ctx.arg.wordsize, ".ARM.exidx") {} + : SyntheticSection(ctx, ".ARM.exidx", SHT_ARM_EXIDX, + SHF_ALLOC | SHF_LINK_ORDER, ctx.arg.wordsize) {} static InputSection *findExidxSection(InputSection *isec) { for (InputSection *d : isec->dependentSections) @@ -4250,8 +4247,9 @@ bool ARMExidxSyntheticSection::isNeeded() const { } ThunkSection::ThunkSection(Ctx &ctx, OutputSection *os, uint64_t off) - : SyntheticSection(ctx, SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, - ctx.arg.emachine == EM_PPC64 ? 16 : 4, ".text.thunk") { + : SyntheticSection(ctx, ".text.thunk", SHT_PROGBITS, + SHF_ALLOC | SHF_EXECINSTR, + ctx.arg.emachine == EM_PPC64 ? 16 : 4) { this->parent = os; this->outSecOff = off; } @@ -4294,7 +4292,7 @@ bool ThunkSection::assignOffsets() { } PPC32Got2Section::PPC32Got2Section(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, 4, ".got2") {} + : SyntheticSection(ctx, ".got2", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE, 4) {} bool PPC32Got2Section::isNeeded() const { // See the comment below. This is not needed if there is no other @@ -4327,9 +4325,9 @@ void PPC32Got2Section::finalizeContents() { // position-independent code the section has type SHT_NOBITS since it will be // allocated and filled in by the dynamic linker. PPC64LongBranchTargetSection::PPC64LongBranchTargetSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC | SHF_WRITE, - ctx.arg.isPic ? SHT_NOBITS : SHT_PROGBITS, 8, - ".branch_lt") {} + : SyntheticSection(ctx, ".branch_lt", + ctx.arg.isPic ? SHT_NOBITS : SHT_PROGBITS, + SHF_ALLOC | SHF_WRITE, 8) {} uint64_t PPC64LongBranchTargetSection::getEntryVA(const Symbol *sym, int64_t addend) { @@ -4393,7 +4391,7 @@ static uint8_t getAbiVersion(Ctx &ctx) { uint8_t ver = ctx.objectFiles[0]->abiVersion; for (InputFile *file : ArrayRef(ctx.objectFiles).slice(1)) if (file->abiVersion != ver) - ErrAlways(ctx) << "incompatible ABI version: " << file; + Err(ctx) << "incompatible ABI version: " << file; return ver; } @@ -4442,7 +4440,7 @@ template void elf::writePhdrs(uint8_t *buf, Partition &part) { template PartitionElfHeaderSection::PartitionElfHeaderSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC, SHT_LLVM_PART_EHDR, 1, "") {} + : SyntheticSection(ctx, "", SHT_LLVM_PART_EHDR, SHF_ALLOC, 1) {} template size_t PartitionElfHeaderSection::getSize() const { @@ -4460,7 +4458,7 @@ void PartitionElfHeaderSection::writeTo(uint8_t *buf) { template PartitionProgramHeadersSection::PartitionProgramHeadersSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC, SHT_LLVM_PART_PHDR, 1, ".phdrs") {} + : SyntheticSection(ctx, ".phdrs", SHT_LLVM_PART_PHDR, SHF_ALLOC, 1) {} template size_t PartitionProgramHeadersSection::getSize() const { @@ -4473,7 +4471,7 @@ void PartitionProgramHeadersSection::writeTo(uint8_t *buf) { } PartitionIndexSection::PartitionIndexSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC, SHT_PROGBITS, 4, ".rodata") {} + : SyntheticSection(ctx, ".rodata", SHT_PROGBITS, SHF_ALLOC, 4) {} size_t PartitionIndexSection::getSize() const { return 12 * (ctx.partitions.size() - 1); diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index cf178411e1eac..4b643e8633551 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -137,8 +137,8 @@ class GotSection final : public SyntheticSection { class GnuStackSection : public SyntheticSection { public: GnuStackSection(Ctx &ctx) - : SyntheticSection(ctx, 0, llvm::ELF::SHT_PROGBITS, 1, - ".note.GNU-stack") {} + : SyntheticSection(ctx, ".note.GNU-stack", llvm::ELF::SHT_PROGBITS, 0, + 1) {} void writeTo(uint8_t *buf) override {} size_t getSize() const override { return 0; } }; @@ -177,7 +177,9 @@ class BssSection final : public SyntheticSection { bool isNeeded() const override { return size != 0; } size_t getSize() const override { return size; } - static bool classof(const SectionBase *s) { return s->bss; } + static bool classof(const SectionBase *s) { + return isa(s) && cast(s)->bss; + } uint64_t size; }; @@ -1084,7 +1086,7 @@ class MergeSyntheticSection : public SyntheticSection { protected: MergeSyntheticSection(Ctx &ctx, StringRef name, uint32_t type, uint64_t flags, uint32_t addralign) - : SyntheticSection(ctx, flags, type, addralign, name) {} + : SyntheticSection(ctx, name, type, flags, addralign) {} }; class MergeTailSection final : public MergeSyntheticSection { @@ -1396,8 +1398,8 @@ class PartitionIndexSection final : public SyntheticSection { class MemtagAndroidNote final : public SyntheticSection { public: MemtagAndroidNote(Ctx &ctx) - : SyntheticSection(ctx, llvm::ELF::SHF_ALLOC, llvm::ELF::SHT_NOTE, - /*alignment=*/4, ".note.android.memtag") {} + : SyntheticSection(ctx, ".note.android.memtag", llvm::ELF::SHT_NOTE, + llvm::ELF::SHF_ALLOC, /*addralign=*/4) {} void writeTo(uint8_t *buf) override; size_t getSize() const override; }; @@ -1405,8 +1407,8 @@ class MemtagAndroidNote final : public SyntheticSection { class PackageMetadataNote final : public SyntheticSection { public: PackageMetadataNote(Ctx &ctx) - : SyntheticSection(ctx, llvm::ELF::SHF_ALLOC, llvm::ELF::SHT_NOTE, - /*alignment=*/4, ".note.package") {} + : SyntheticSection(ctx, ".note.package", llvm::ELF::SHT_NOTE, + llvm::ELF::SHF_ALLOC, /*addralign=*/4) {} void writeTo(uint8_t *buf) override; size_t getSize() const override; }; @@ -1414,9 +1416,9 @@ class PackageMetadataNote final : public SyntheticSection { class MemtagGlobalDescriptors final : public SyntheticSection { public: MemtagGlobalDescriptors(Ctx &ctx) - : SyntheticSection(ctx, llvm::ELF::SHF_ALLOC, + : SyntheticSection(ctx, ".memtag.globals.dynamic", llvm::ELF::SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC, - /*alignment=*/4, ".memtag.globals.dynamic") {} + llvm::ELF::SHF_ALLOC, /*addralign=*/4) {} void writeTo(uint8_t *buf) override; // The size of the section is non-computable until all addresses are // synthetized, because the section's contents contain a sorted diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp index 203252dbac122..63d813e550f93 100644 --- a/lld/ELF/Target.cpp +++ b/lld/ELF/Target.cpp @@ -84,7 +84,7 @@ void elf::setTarget(Ctx &ctx) { case EM_X86_64: return setX86_64TargetInfo(ctx); default: - Fatal(ctx) << "unsupported e_machine value: " << Twine(ctx.arg.emachine); + Fatal(ctx) << "unsupported e_machine value: " << ctx.arg.emachine; } } diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index ce42d3624a8f5..fd1e5d33c438a 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -211,8 +211,8 @@ static inline std::string getErrorLoc(Ctx &ctx, const uint8_t *loc) { void processArmCmseSymbols(Ctx &); template uint32_t calcMipsEFlags(Ctx &); -uint8_t getMipsFpAbiFlag(Ctx &, uint8_t oldFlag, uint8_t newFlag, - llvm::StringRef fileName); +uint8_t getMipsFpAbiFlag(Ctx &, InputFile *file, uint8_t oldFlag, + uint8_t newFlag); bool isMipsN32Abi(Ctx &, const InputFile &f); bool isMicroMips(Ctx &); bool isMipsR6(Ctx &); @@ -292,7 +292,7 @@ inline void checkAlignment(Ctx &ctx, uint8_t *loc, uint64_t v, int n, if ((v & (n - 1)) != 0) Err(ctx) << getErrorLoc(ctx, loc) << "improper alignment for relocation " << rel.type << ": 0x" << llvm::utohexstr(v) - << " is not aligned to " << Twine(n) << " bytes"; + << " is not aligned to " << n << " bytes"; } // Endianness-aware read/write. diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 67497bad7cb23..a7fbdc0790704 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1572,8 +1572,8 @@ template void Writer::finalizeAddressDependentContent() { if (osec->addr % osec->addralign != 0) Warn(ctx) << "address (0x" << Twine::utohexstr(osec->addr) << ") of section " << osec->name - << " is not a multiple of alignment (" - << Twine(osec->addralign) << ")"; + << " is not a multiple of alignment (" << osec->addralign + << ")"; } // Sizes are no longer allowed to grow, so all allowable spills have been @@ -2794,7 +2794,7 @@ template void Writer::openFile() { if (fileSize != size_t(fileSize) || maxSize < fileSize) { std::string msg; raw_string_ostream s(msg); - s << "output file too large: " << Twine(fileSize) << " bytes\n" + s << "output file too large: " << fileSize << " bytes\n" << "section sizes:\n"; for (OutputSection *os : ctx.outputSections) s << os->name << ' ' << os->size << "\n"; diff --git a/lld/include/lld/Common/ErrorHandler.h b/lld/include/lld/Common/ErrorHandler.h index ee11f17893971..79e20be2bb6be 100644 --- a/lld/include/lld/Common/ErrorHandler.h +++ b/lld/include/lld/Common/ErrorHandler.h @@ -71,6 +71,7 @@ #include "lld/Common/LLVM.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/raw_ostream.h" @@ -151,20 +152,21 @@ void message(const Twine &msg, llvm::raw_ostream &s = outs()); void warn(const Twine &msg); uint64_t errorCount(); -enum class DiagLevel { Log, Msg, Warn, Err, Fatal }; +enum class DiagLevel { None, Log, Msg, Warn, Err, Fatal }; // A class that synchronizes thread writing to the same stream similar // std::osyncstream. class SyncStream { ErrorHandler &e; DiagLevel level; - std::string buf; + llvm::SmallString<0> buf; public: - mutable llvm::raw_string_ostream os{buf}; + mutable llvm::raw_svector_ostream os{buf}; SyncStream(ErrorHandler &e, DiagLevel level) : e(e), level(level) {} SyncStream(SyncStream &&o) : e(o.e), level(o.level), buf(std::move(o.buf)) {} ~SyncStream(); + StringRef str() { return os.str(); } }; [[noreturn]] void exitLld(int val); diff --git a/lld/test/COFF/arm64ec.test b/lld/test/COFF/arm64ec.test index e50b14ce0184c..75288e97e598d 100644 --- a/lld/test/COFF/arm64ec.test +++ b/lld/test/COFF/arm64ec.test @@ -4,6 +4,7 @@ RUN: split-file %s %t.dir && cd %t.dir RUN: llvm-mc -filetype=obj -triple=aarch64-windows arm64-data-sym.s -o arm64-data-sym.obj RUN: llvm-mc -filetype=obj -triple=arm64ec-windows arm64ec-data-sym.s -o arm64ec-data-sym.obj RUN: llvm-mc -filetype=obj -triple=x86_64-windows x86_64-data-sym.s -o x86_64-data-sym.obj +RUN: llvm-mc -filetype=obj -triple=i686-windows x86_64-data-sym.s -o i686-data-sym.obj RUN: llvm-cvtres -machine:arm64x -out:arm64x-resource.obj %S/Inputs/resource.res RUN: lld-link -out:test.dll -machine:arm64ec arm64ec-data-sym.obj -dll -noentry @@ -46,6 +47,26 @@ RUN: not lld-link -out:test.dll -machine:arm64 arm64-data-sym.obj x86_64-data-sy RUN: -dll -noentry 2>&1 | FileCheck -check-prefix=INCOMPAT3 %s INCOMPAT3: lld-link: error: x86_64-data-sym.obj: machine type x64 conflicts with arm64 +arm64ec machine type can't be inferred, it must be specified explicitly. +RUN: not lld-link -out:test.dll arm64ec-data-sym.obj \ +RUN: -dll -noentry 2>&1 | FileCheck -check-prefix=INCOMPAT4 %s +INCOMPAT4: lld-link: error: arm64ec-data-sym.obj: machine type arm64ec is ambiguous and cannot be inferred, use /machine:arm64ec or /machine:arm64x + +RUN: not lld-link -out:test.dll x86_64-data-sym.obj arm64ec-data-sym.obj \ +RUN: -dll -noentry 2>&1 | FileCheck -check-prefix=INCOMPAT4 %s + +RUN: not lld-link -out:test.dll arm64-data-sym.obj arm64ec-data-sym.obj \ +RUN: -dll -noentry 2>&1 | FileCheck -check-prefix=INCOMPAT4 %s + +RUN: not lld-link -out:test.dll i686-data-sym.obj arm64ec-data-sym.obj \ +RUN: -dll -noentry 2>&1 | FileCheck -check-prefix=INCOMPAT5 %s +INCOMPAT5: lld-link: error: arm64ec-data-sym.obj: machine type arm64ec conflicts with x86 + +arm64x can be inferred and when mixed with ARM64, the first one wins +RUN: lld-link -out:test.dll -dll -noentry arm64x-resource.obj arm64-data-sym.obj x86_64-data-sym.obj arm64ec-data-sym.obj +RUN: not lld-link -out:test.dll -dll -noentry arm64-data-sym.obj arm64x-resource.obj x86_64-data-sym.obj 2>&1 | FileCheck -check-prefix=INCOMPAT3 %s +RUN: not lld-link -out:test.dll -dll -noentry arm64-data-sym.obj arm64x-resource.obj arm64ec-data-sym.obj 2>&1 | FileCheck -check-prefix=INCOMPAT4 %s + #--- arm64ec-data-sym.s .data .globl arm64ec_data_sym diff --git a/lld/test/ELF/arm-rwpi-debug-relocs.s b/lld/test/ELF/arm-rwpi-debug-relocs.s new file mode 100644 index 0000000000000..2bb968d4afa9a --- /dev/null +++ b/lld/test/ELF/arm-rwpi-debug-relocs.s @@ -0,0 +1,54 @@ +/// Test that R_ARM_SBREL32 relocations in debug info are relocated as if the +/// static base register (r9) is zero. Real DWARF info will use an expression to +/// add this to the real value of the static base at runtime. + +// REQUIRES: arm +// RUN: rm -rf %t && split-file %s %t && cd %t + +// RUN: llvm-mc -filetype=obj -triple=armv7a asm.s -o obj.o +// RUN: ld.lld -T lds.ld obj.o -o exe.elf 2>&1 | FileCheck %s --implicit-check-not=warning: --allow-empty +// RUN: llvm-objdump -D exe.elf | FileCheck --check-prefix=DISASM %s + +// DISASM-LABEL: : +// DISASM-NEXT: 1000: 0000002a + +// DISASM-LABEL: : +// DISASM-NEXT: 2000: 000004d2 + +// DISASM-LABEL: <.debug_something>: +// DISASM-NEXT: 0: 00001000 +// DISASM-NEXT: ... +// DISASM-NEXT: 104: 00002000 + +//--- lds.ld +SECTIONS { + data1 0x1000 : { *(data1) } + data2 0x2000 : { *(data2) } +} + +//--- asm.s + .text + .type _start,%function + .globl _start +_start: + bx lr + .size _start, .-_start + + .section data1, "aw", %progbits + .type rw,%object + .globl rw +rw: + .long 42 + .size rw, 4 + + .section data2, "aw", %progbits + .type rw2,%object + .globl rw2 +rw2: + .long 1234 + .size rw2, 4 + + .section .debug_something, "", %progbits + .long rw(sbrel) + .space 0x100 + .long rw2(sbrel) diff --git a/lld/test/ELF/incompatible.s b/lld/test/ELF/incompatible.s index 39c25106f4d72..0d25acd857610 100644 --- a/lld/test/ELF/incompatible.s +++ b/lld/test/ELF/incompatible.s @@ -6,11 +6,11 @@ // RUN: not ld.lld %ta.o %tb.o -o /dev/null 2>&1 | \ // RUN: FileCheck --check-prefix=A-AND-B %s -// A-AND-B: b.o is incompatible with {{.*}}a.o +// A-AND-B: error: {{.*}}b.o is incompatible with {{.*}}a.o -// RUN: not ld.lld %tb.o %tc.o -o /dev/null 2>&1 | \ +// RUN: ld.lld --noinhibit-exec %tb.o %tc.o -o /dev/null 2>&1 | \ // RUN: FileCheck --check-prefix=B-AND-C %s -// B-AND-C: c.o is incompatible with {{.*}}b.o +// B-AND-C: warning: {{.*}}c.o is incompatible with {{.*}}b.o // RUN: not ld.lld %ta.o %ti686.so -o /dev/null 2>&1 | \ // RUN: FileCheck --check-prefix=A-AND-SO %s @@ -69,8 +69,8 @@ // RUN: rm -f %t.a // RUN: llvm-ar rc %t.a %tc.o // RUN: llvm-mc -filetype=obj -triple=i686-linux %s -o %td.o -// RUN: not ld.lld %t.a %td.o 2>&1 -o /dev/null | FileCheck --check-prefix=ARCHIVE %s -// ARCHIVE: {{.*}}d.o is incompatible +// RUN: ld.lld --noinhibit-exec %t.a %td.o 2>&1 -o /dev/null | FileCheck --check-prefix=ARCHIVE %s +// ARCHIVE: warning: {{.*}}d.o is incompatible{{$}} .global _start _start: .data diff --git a/lld/test/ELF/linkerscript/symbol-location.s b/lld/test/ELF/linkerscript/symbol-location.s index 4620982bf3f20..fd5cc9de048f1 100644 --- a/lld/test/ELF/linkerscript/symbol-location.s +++ b/lld/test/ELF/linkerscript/symbol-location.s @@ -2,6 +2,7 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o # RUN: echo 'foo = _start;' > %t.script # RUN: not ld.lld -shared -T %t.script %t.o -o /dev/null 2>&1 | FileCheck %s +# RUN: not ld.lld -shared --defsym 'foo = _start' %t.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK2 ## Here we check that symbol 'foo' location is reported properly. @@ -9,6 +10,10 @@ # CHECK: >>> defined in {{.*}}.script:1 # CHECK: >>> referenced by {{.*}}.o:(.text+0x1) +# CHECK2: error: relocation R_X86_64_PC32 cannot be used against symbol 'foo' +# CHECK2: >>> defined in --defsym{{$}} +# CHECK2: >>> referenced by {{.*}}.o:(.text+0x1) + .text .globl _start _start: diff --git a/lld/test/ELF/merge-addr.s b/lld/test/ELF/merge-addr.s new file mode 100644 index 0000000000000..b36619788083b --- /dev/null +++ b/lld/test/ELF/merge-addr.s @@ -0,0 +1,63 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: ld.lld %t.o -o %t.so -shared --section-start cst4=0x800 --section-start str=0x1000 +# RUN: llvm-readelf -r -S %t.so | FileCheck %s +# RUN: llvm-objdump -s %t.so | FileCheck %s --check-prefix=OBJDUMP + +# RUN: ld.lld %t.o -o %t0.so -O0 -shared --section-start cst4=0x800 --section-start str=0x1000 +# RUN: llvm-objdump -s %t0.so | FileCheck %s --check-prefix=OBJDUMP0 +# RUN: ld.lld %t.o -o %t2.so -O2 -shared --section-start cst4=0x800 --section-start str=0x1000 +# RUN: llvm-objdump -s %t2.so | FileCheck %s --check-prefix=OBJDUMP2 + +# CHECK: Name Type Address Off Size ES Flg Lk Inf Al +# CHECK: cst4 PROGBITS 0000000000000800 000800 000004 04 AM 0 0 1 +# CHECK-NEXT: str PROGBITS 0000000000001000 001000 000009 01 AMS 0 0 1 + +# CHECK: Relocation section '.rela.dyn' +# CHECK-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend +# CHECK-NEXT: R_X86_64_RELATIVE 802 +# CHECK-NEXT: R_X86_64_RELATIVE 1000 +# CHECK-NEXT: R_X86_64_RELATIVE 1006 +# CHECK-NEXT: R_X86_64_RELATIVE 1002 +# CHECK-EMPTY: + +# OBJDUMP: Contents of section str: +# OBJDUMP-NEXT: 1000 61006162 63006263 00 a.abc.bc. +# OBJDUMP: Contents of section .data: +# OBJDUMP-NEXT: 00000000 00000000 00000000 00000000 ................ +# OBJDUMP-NEXT: 00000000 00000000 ........ +# OBJDUMP: Contents of section .bar: +# OBJDUMP-NEXT: 0000 00080000 00000000 00080000 00000000 ................ + +# OBJDUMP0: Contents of section cst4: +# OBJDUMP0-NEXT: 0800 2a000000 2a000000 *...*... +# OBJDUMP0-NEXT: Contents of section str: +# OBJDUMP0-NEXT: 1000 61626300 61006263 00626300 abc.a.bc.bc. + +# OBJDUMP2: Contents of section cst4: +# OBJDUMP2-NEXT: 0800 2a000000 *... +# OBJDUMP2-NEXT: Contents of section str: +# OBJDUMP2-NEXT: 1000 61626300 6100 abc.a. + +.section cst4,"aM",@progbits,4 +.long 42 +.long 42 + +.section str,"aMS",@progbits,1 +abc: +.asciz "abc" +a: +.asciz "a" +bc: +.asciz "bc" +.asciz "bc" + +.data +.quad cst4 + 6 +.quad a +.quad bc +.quad abc + +.section .bar +.quad cst4 +.quad cst4 + 4 diff --git a/lld/test/ELF/merge-reloc.s b/lld/test/ELF/merge-reloc.s index a343d5679b58e..75a48099f5089 100644 --- a/lld/test/ELF/merge-reloc.s +++ b/lld/test/ELF/merge-reloc.s @@ -1,91 +1,57 @@ # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o -# RUN: ld.lld %t.o -r -o %t-rel -# RUN: llvm-readobj -S --section-data %t-rel | FileCheck %s +# RUN: ld.lld %t.o -r -o %t.ro +# RUN: llvm-readelf -S %t.ro | FileCheck %s +# RUN: llvm-objdump -s %t.ro | FileCheck %s --check-prefix=OBJDUMP -# When linker generates a relocatable object it does string merging in the same -# way as for regular link. It should keep SHF_MERGE flag and set proper sh_entsize -# value so that final link can perform the final merging optimization. +# RUN: ld.lld %t.o -o %t +# RUN: llvm-readelf -S %t | FileCheck %s --check-prefix=CHECK-PDE -# CHECK: Section { -# CHECK: Index: -# CHECK: Name: .rodata.1 ( -# CHECK-NEXT: Type: SHT_PROGBITS -# CHECK-NEXT: Flags [ -# CHECK-NEXT: SHF_ALLOC -# CHECK-NEXT: SHF_MERGE -# CHECK-NEXT: ] -# CHECK-NEXT: Address: -# CHECK-NEXT: Offset: -# CHECK-NEXT: Size: 4 -# CHECK-NEXT: Link: 0 -# CHECK-NEXT: Info: 0 -# CHECK-NEXT: AddressAlignment: 4 -# CHECK-NEXT: EntrySize: 4 -# CHECK-NEXT: SectionData ( -# CHECK-NEXT: 0000: 42000000 -# CHECK-NEXT: ) -# CHECK-NEXT: } -# CHECK: Section { -# CHECK: Index: -# CHECK: Name: .rodata.2 ( -# CHECK-NEXT: Type: SHT_PROGBITS -# CHECK-NEXT: Flags [ -# CHECK-NEXT: SHF_ALLOC -# CHECK-NEXT: SHF_MERGE -# CHECK-NEXT: ] -# CHECK-NEXT: Address: -# CHECK-NEXT: Offset: -# CHECK-NEXT: Size: 8 -# CHECK-NEXT: Link: 0 -# CHECK-NEXT: Info: 0 -# CHECK-NEXT: AddressAlignment: 8 -# CHECK-NEXT: EntrySize: 8 -# CHECK-NEXT: SectionData ( -# CHECK-NEXT: 0000: 42000000 42000000 -# CHECK-NEXT: ) -# CHECK-NEXT: } -# CHECK: Section { -# CHECK: Index: -# CHECK: Name: .data -# CHECK-NEXT: Type: SHT_PROGBITS -# CHECK-NEXT: Flags [ -# CHECK-NEXT: SHF_ALLOC -# CHECK-NEXT: SHF_WRITE -# CHECK-NEXT: ] -# CHECK-NEXT: Address: -# CHECK-NEXT: Offset: -# CHECK-NEXT: Size: 16 -# CHECK-NEXT: Link: 0 -# CHECK-NEXT: Info: 0 -# CHECK-NEXT: AddressAlignment: 1 -# CHECK-NEXT: EntrySize: 0 -# CHECK-NEXT: SectionData ( -# CHECK-NEXT: 0000: 42000000 42000000 42000000 42000000 -# CHECK-NEXT: ) -# CHECK-NEXT: } +# CHECK: [Nr] Name Type Address Off Size ES Flg Lk Inf Al +# CHECK-NEXT: [ 0] NULL 0000000000000000 000000 000000 00 0 0 0 +# CHECK-NEXT: [ 1] .text PROGBITS 0000000000000000 000040 000000 00 AX 0 0 4 +# CHECK-NEXT: [ 2] .rodata.1 PROGBITS 0000000000000000 000040 000004 04 AM 0 0 4 +# CHECK-NEXT: [ 3] .rodata.2 PROGBITS 0000000000000000 000048 000008 08 AM 0 0 8 +# CHECK-NEXT: [ 4] .rodata.cst8 PROGBITS 0000000000000000 000050 000010 08 AM 0 0 1 +# CHECK-NEXT: [ 5] .rela.rodata.cst8 RELA 0000000000000000 000068 000030 18 I 9 4 8 +# CHECK-NEXT: [ 6] .cst4 PROGBITS 0000000000000000 000060 000008 04 AM 0 0 1 +# CHECK-NEXT: [ 7] .rela.cst4 RELA 0000000000000000 000098 000030 18 I 9 6 8 + +# OBJDUMP: Contents of section .rodata.1: +# OBJDUMP-NEXT: 0000 42000000 B... +# OBJDUMP-NEXT: Contents of section .rodata.2: +# OBJDUMP-NEXT: 0000 42000000 42000000 B...B... +# OBJDUMP-NEXT: Contents of section .rodata.cst8: +# OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000 ................ +# OBJDUMP: Contents of section .cst4: +# OBJDUMP-NEXT: 0000 00000000 00000000 ........ + +# CHECK-PDE: [ 2] .cst4 PROGBITS 0000000000200140 000140 000008 04 AM 0 0 1 - .section .rodata.1,"aM",@progbits,4 - .align 4 - .global foo foo: - .long 0x42 - .long 0x42 - .long 0x42 - .section .rodata.2,"aM",@progbits,8 - .align 8 - .global bar -bar: - .long 0x42 - .long 0x42 - .long 0x42 - .long 0x42 +.section .rodata.1,"aM",@progbits,4 +.align 4 +.long 0x42 +.long 0x42 +.long 0x42 + +.section .rodata.2,"aM",@progbits,8 +.align 8 +.long 0x42 +.long 0x42 +.long 0x42 +.long 0x42 + +## Test that we keep a SHT_REL[A] section which relocates a SHF_MERGE section +## in -r mode. The relocated SHF_MERGE section is handled as non-mergeable. +.section .rodata.cst8,"aM",@progbits,8,unique,0 +.quad foo + +.section .rodata.cst8,"aM",@progbits,8,unique,1 +.quad foo - .data - .global gar -zed: - .long 0x42 - .long 0x42 - .long 0x42 - .long 0x42 +.section .cst4,"aM",@progbits,4,unique,0 +.long foo +.section .cst4,"aM",@progbits,4,unique,1 +.long foo diff --git a/lld/test/ELF/merge-relocatable.s b/lld/test/ELF/merge-relocatable.s deleted file mode 100644 index d376f4ca0b422..0000000000000 --- a/lld/test/ELF/merge-relocatable.s +++ /dev/null @@ -1,23 +0,0 @@ -# REQUIRES: x86 - -## Test that we keep a SHT_REL[A] section which relocates a SHF_MERGE section -## in -r mode. The relocated SHF_MERGE section is handled as non-mergeable. - -# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o -# RUN: ld.lld -r %t.o -o %t -# RUN: llvm-readobj -S %t | FileCheck %s - -# CHECK: Name: .rodata.cst8 -# CHECK-NOT: } -# CHECK: Size: 16 -# CHECK: Name: .rela.rodata.cst8 -# CHECK-NOT: } -# CHECK: Size: 48 - -foo: - -.section .rodata.cst8,"aM",@progbits,8,unique,0 -.quad foo - -.section .rodata.cst8,"aM",@progbits,8,unique,1 -.quad foo diff --git a/lld/test/ELF/merge-shared-str.s b/lld/test/ELF/merge-shared-str.s deleted file mode 100644 index 9ecdd64e97310..0000000000000 --- a/lld/test/ELF/merge-shared-str.s +++ /dev/null @@ -1,28 +0,0 @@ -// REQUIRES: x86 -// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o -// RUN: ld.lld %t.o -o %t.so -shared -O3 -// RUN: llvm-readobj -r -S %t.so | FileCheck %s - - - .section foo,"aMS",@progbits,1 - .asciz "bar" - .asciz "ar" - - .data - .quad foo + 4 - - -// CHECK: Name: foo -// CHECK-NEXT: Type: SHT_PROGBITS -// CHECK-NEXT: Flags [ -// CHECK-NEXT: SHF_ALLOC -// CHECK-NEXT: SHF_MERGE -// CHECK-NEXT: SHF_STRINGS -// CHECK-NEXT: ] -// CHECK-NEXT: Address: 0x260 - -// CHECK: Relocations [ -// CHECK-NEXT: Section ({{.*}}) .rela.dyn { -// CHECK-NEXT: 0x{{.*}} R_X86_64_RELATIVE - 0x261 -// CHECK-NEXT: } -// CHECK-NEXT: ] diff --git a/lld/test/ELF/merge-shared.s b/lld/test/ELF/merge-shared.s deleted file mode 100644 index 12cb738c1077e..0000000000000 --- a/lld/test/ELF/merge-shared.s +++ /dev/null @@ -1,26 +0,0 @@ -// REQUIRES: x86 -// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o -// RUN: ld.lld %t.o -o %t.so -shared -// RUN: llvm-readobj -r -S %t.so | FileCheck %s - - .section foo,"aM",@progbits,4 - .long 42 - .long 42 - - .data - .quad foo + 6 - - -// CHECK: Name: foo -// CHECK-NEXT: Type: SHT_PROGBITS -// CHECK-NEXT: Flags [ -// CHECK-NEXT: SHF_ALLOC -// CHECK-NEXT: SHF_MERGE -// CHECK-NEXT: ] -// CHECK-NEXT: Address: 0x260 - -// CHECK: Relocations [ -// CHECK-NEXT: Section ({{.*}}) .rela.dyn { -// CHECK-NEXT: 0x{{.*}} R_X86_64_RELATIVE - 0x262 -// CHECK-NEXT: } -// CHECK-NEXT: ] diff --git a/lld/test/ELF/merge-string.s b/lld/test/ELF/merge-string.s deleted file mode 100644 index 549195d5cf805..0000000000000 --- a/lld/test/ELF/merge-string.s +++ /dev/null @@ -1,105 +0,0 @@ -// REQUIRES: x86 -// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o -// RUN: ld.lld -O 2 %t.o -o %t.so -shared -// RUN: llvm-readobj -S --section-data --symbols %t.so | FileCheck %s -// RUN: ld.lld -O 1 %t.o -o %t.so -shared -// RUN: llvm-readobj -S --section-data --symbols %t.so | FileCheck --check-prefix=NOTAIL %s -// RUN: ld.lld -O 0 %t.o -o %t.so -shared -// RUN: llvm-readobj -S --section-data --symbols %t.so | FileCheck --check-prefix=NOMERGE %s - - .section .rodata1,"aMS",@progbits,1 - .asciz "abc" -foo: - .ascii "a" -bar: - .asciz "bc" - .asciz "bc" - - .section .rodata2,"aMS",@progbits,2 - .p2align 1 -zed: - .short 20 - .short 0 - -// CHECK: Name: .rodata1 -// CHECK-NEXT: Type: SHT_PROGBITS -// CHECK-NEXT: Flags [ -// CHECK-NEXT: SHF_ALLOC -// CHECK-NEXT: SHF_MERGE -// CHECK-NEXT: SHF_STRINGS -// CHECK-NEXT: ] -// CHECK-NEXT: Address: 0x20D -// CHECK-NEXT: Offset: 0x20D -// CHECK-NEXT: Size: 4 -// CHECK-NEXT: Link: 0 -// CHECK-NEXT: Info: 0 -// CHECK-NEXT: AddressAlignment: 1 -// CHECK-NEXT: EntrySize: 1 -// CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 61626300 |abc.| -// CHECK-NEXT: ) - -// NOTAIL: Name: .rodata1 -// NOTAIL-NEXT: Type: SHT_PROGBITS -// NOTAIL-NEXT: Flags [ -// NOTAIL-NEXT: SHF_ALLOC -// NOTAIL-NEXT: SHF_MERGE -// NOTAIL-NEXT: SHF_STRINGS -// NOTAIL-NEXT: ] -// NOTAIL-NEXT: Address: 0x20D -// NOTAIL-NEXT: Offset: 0x20D -// NOTAIL-NEXT: Size: 7 -// NOTAIL-NEXT: Link: 0 -// NOTAIL-NEXT: Info: 0 -// NOTAIL-NEXT: AddressAlignment: 1 -// NOTAIL-NEXT: EntrySize: 1 -// NOTAIL-NEXT: SectionData ( -// NOTAIL-NEXT: 0000: 61626300 626300 |abc.bc.| -// NOTAIL-NEXT: ) - -// NOMERGE: Name: .rodata1 -// NOMERGE-NEXT: Type: SHT_PROGBITS -// NOMERGE-NEXT: Flags [ -// NOMERGE-NEXT: SHF_ALLOC -// NOMERGE-NEXT: SHF_MERGE -// NOMERGE-NEXT: SHF_STRINGS -// NOMERGE-NEXT: ] -// NOMERGE-NEXT: Address: 0x20D -// NOMERGE-NEXT: Offset: 0x20D -// NOMERGE-NEXT: Size: 11 -// NOMERGE-NEXT: Link: 0 -// NOMERGE-NEXT: Info: 0 -// NOMERGE-NEXT: AddressAlignment: 1 -// NOMERGE-NEXT: EntrySize: 1 -// NOMERGE-NEXT: SectionData ( -// NOMERGE-NEXT: 0000: 61626300 61626300 626300 |abc.abc.bc.| -// NOMERGE-NEXT: ) - -// CHECK: Name: .rodata2 -// CHECK-NEXT: Type: SHT_PROGBITS -// CHECK-NEXT: Flags [ -// CHECK-NEXT: SHF_ALLOC -// CHECK-NEXT: SHF_MERGE -// CHECK-NEXT: SHF_STRINGS -// CHECK-NEXT: ] -// CHECK-NEXT: Address: 0x212 -// CHECK-NEXT: Offset: 0x212 -// CHECK-NEXT: Size: 4 -// CHECK-NEXT: Link: 0 -// CHECK-NEXT: Info: 0 -// CHECK-NEXT: AddressAlignment: 2 -// CHECK-NEXT: EntrySize: 2 -// CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 14000000 |....| -// CHECK-NEXT: ) - - -// CHECK: Name: foo -// CHECK-NEXT: Value: 0x20D - -// CHECK: Name: bar -// CHECK-NEXT: Value: 0x20E - -// CHECK: Name: zed -// CHECK-NEXT: Value: 0x212 -// CHECK-NEXT: Size: 0 diff --git a/lld/test/ELF/merge-to-non-alloc.s b/lld/test/ELF/merge-to-non-alloc.s deleted file mode 100644 index 17e826ed5bb0c..0000000000000 --- a/lld/test/ELF/merge-to-non-alloc.s +++ /dev/null @@ -1,33 +0,0 @@ -// REQUIRES: x86 -// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o -// RUN: ld.lld %t.o -o %t.so -shared -// RUN: llvm-readobj -S --section-data --symbols %t.so | FileCheck %s - -// CHECK: Name: .bar -// CHECK-NEXT: Type: SHT_PROGBITS -// CHECK-NEXT: Flags [ -// CHECK-NEXT: ] -// CHECK-NEXT: Address: -// CHECK-NEXT: Offset: -// CHECK-NEXT: Size: 16 -// CHECK-NEXT: Link: -// CHECK-NEXT: Info: -// CHECK-NEXT: AddressAlignment: -// CHECK-NEXT: EntrySize: -// CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 10020000 00000000 18020000 00000000 | -// CHECK-NEXT: ) - -// CHECK: Name: foo -// CHECK-NEXT: Value: 0x210 - - .section .foo,"aM",@progbits,4 - .align 4 - .global foo - .hidden foo -foo: - .long 0x42 - - .section .bar - .quad foo - .quad foo + 8 diff --git a/lld/test/ELF/reproduce.s b/lld/test/ELF/reproduce.s index 8818a9e35f403..29dc109d5a412 100644 --- a/lld/test/ELF/reproduce.s +++ b/lld/test/ELF/reproduce.s @@ -76,11 +76,12 @@ ## Check that directory path is stripped from -o # RUN: mkdir -p %t.dir/build4/a/b/c # RUN: cd %t.dir -# RUN: ld.lld build1/foo.o -o build4/a/b/c/bar -Map build4/map --print-archive-stats=build4/stats \ +# RUN: ld.lld build1/foo.o -o build4/a/b/c/bar -Map build4/map --dependency-file=build4/bar.d --print-archive-stats=build4/stats \ # RUN: --why-extract=build4/why -shared --as-needed --reproduce=repro4.tar # RUN: tar xOf repro4.tar repro4/response.txt | FileCheck %s --check-prefix=RSP4 # RSP4: -o bar # RSP4-NEXT: -Map map +# RSP4-NEXT: --dependency-file bar.d # RSP4-NEXT: --print-archive-stats=stats # RSP4-NEXT: --why-extract=why diff --git a/lldb/docs/use/aarch64-linux.md b/lldb/docs/use/aarch64-linux.md index 70432f57857a5..393838dc0bb4f 100644 --- a/lldb/docs/use/aarch64-linux.md +++ b/lldb/docs/use/aarch64-linux.md @@ -17,7 +17,7 @@ In LLDB you will be able to see the following new registers: * `z0-z31` vector registers, each one has size equal to the vector length. * `p0-p15` predicate registers, each one containing 1 bit per byte in the vector - length. Making each one vector length / 8 sized. + length. So each one is `vector length in bits / 8` bits. * `ffr` the first fault register, same size as a predicate register. * `vg`, the vector length in "granules". Each granule is 8 bytes. diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h index b8c53a474ba6b..a184e6dd891af 100644 --- a/lldb/include/lldb/Target/Process.h +++ b/lldb/include/lldb/Target/Process.h @@ -1380,6 +1380,8 @@ class Process : public std::enable_shared_from_this, virtual bool GetProcessInfo(ProcessInstanceInfo &info); + virtual lldb_private::UUID FindModuleUUID(const llvm::StringRef path); + /// Get the exit status for a process. /// /// \return diff --git a/lldb/packages/Python/lldbsuite/test/lldbutil.py b/lldb/packages/Python/lldbsuite/test/lldbutil.py index 660a3c085a908..07b5f8cc7d900 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbutil.py +++ b/lldb/packages/Python/lldbsuite/test/lldbutil.py @@ -1158,17 +1158,6 @@ def GetModuleName(i): return list(map(GetModuleName, list(range(thread.GetNumFrames())))) -def get_stack_frames(thread): - """ - Returns a sequence of stack frames for this thread. - """ - - def GetStackFrame(i): - return thread.GetFrameAtIndex(i) - - return list(map(GetStackFrame, list(range(thread.GetNumFrames())))) - - def print_stacktrace(thread, string_buffer=False): """Prints a simple stack trace of this thread.""" diff --git a/lldb/source/Core/DynamicLoader.cpp b/lldb/source/Core/DynamicLoader.cpp index 3c6c6bd365706..acc84dbf016fb 100644 --- a/lldb/source/Core/DynamicLoader.cpp +++ b/lldb/source/Core/DynamicLoader.cpp @@ -157,11 +157,9 @@ DynamicLoader::GetSectionListFromModule(const ModuleSP module) const { ModuleSP DynamicLoader::FindModuleViaTarget(const FileSpec &file) { Target &target = m_process->GetTarget(); ModuleSpec module_spec(file, target.GetArchitecture()); - ModuleSpec module_spec_from_process; - // Process may be able to augment the module_spec with UUID, e.g. ELF core. - if (m_process->GetModuleSpec(file, target.GetArchitecture(), - module_spec_from_process)) { - module_spec = module_spec_from_process; + if (UUID uuid = m_process->FindModuleUUID(file.GetPath())) { + // Process may be able to augment the module_spec with UUID, e.g. ELF core. + module_spec.GetUUID() = uuid; } if (ModuleSP module_sp = target.GetImages().FindFirstModule(module_spec)) diff --git a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp index 57b12f07b5e0b..b3916cc913f7d 100644 --- a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp +++ b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp @@ -286,20 +286,12 @@ void ProcessElfCore::UpdateBuildIdForNTFileEntries() { } } -bool ProcessElfCore::GetModuleSpec(const FileSpec &module_file_spec, - const ArchSpec &arch, - ModuleSpec &module_spec) { - module_spec.Clear(); - for (NT_FILE_Entry &entry : m_nt_file_entries) { - if (module_file_spec.GetPath() == entry.path) { - module_spec.GetFileSpec() = module_file_spec; - module_spec.GetArchitecture() = arch; - module_spec.GetUUID() = entry.uuid; - return true; - } - } - - return false; +UUID ProcessElfCore::FindModuleUUID(const llvm::StringRef path) { + // Returns the gnu uuid from matched NT_FILE entry + for (NT_FILE_Entry &entry : m_nt_file_entries) + if (path == entry.path) + return entry.uuid; + return UUID(); } lldb_private::DynamicLoader *ProcessElfCore::GetDynamicLoader() { diff --git a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h index a7b1822ccf01f..a91c04a277f60 100644 --- a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h +++ b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h @@ -163,9 +163,7 @@ class ProcessElfCore : public lldb_private::PostMortemProcess { // Populate gnu uuid for each NT_FILE entry void UpdateBuildIdForNTFileEntries(); - bool GetModuleSpec(const lldb_private::FileSpec &module_file_spec, - const lldb_private::ArchSpec &arch, - lldb_private::ModuleSpec &module_spec) override; + lldb_private::UUID FindModuleUUID(const llvm::StringRef path) override; // Returns the value of certain type of note of a given start address lldb_private::UUID FindBuidIdInCoreMemory(lldb::addr_t address); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp index c18edd10b9681..30c890d6d0138 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp @@ -137,9 +137,19 @@ void DWARFIndex::GetTypesWithQuery( bool DWARFIndex::ProcessTypeDIEMatchQuery( TypeQuery &query, DWARFDIE die, llvm::function_ref callback) { - // Nothing to match from query - if (query.GetContextRef().size() <= 1) + // Check the language, but only if we have a language filter. + if (query.HasLanguage() && + !query.LanguageMatches(SymbolFileDWARF::GetLanguageFamily(*die.GetCU()))) + return true; // Keep iterating over index types, language mismatch. + + // Since mangled names are unique, we only need to check if the names are + // the same. + if (query.GetSearchByMangledName()) { + if (die.GetMangledName(/*substitute_name_allowed=*/false) != + query.GetTypeBasename().GetStringRef()) + return true; // Keep iterating over index types, mangled name mismatch. return callback(die); + } std::vector die_context; if (query.GetModuleSearch()) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index 8ce0db4588a46..c900f330b481b 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -2726,39 +2726,8 @@ void SymbolFileDWARF::FindTypes(const TypeQuery &query, TypeResults &results) { TypeQuery query_full(query); bool have_index_match = false; m_index->GetTypesWithQuery(query_full, [&](DWARFDIE die) { - // Check the language, but only if we have a language filter. - if (query.HasLanguage()) { - if (!query.LanguageMatches(GetLanguageFamily(*die.GetCU()))) - return true; // Keep iterating over index types, language mismatch. - } - - // Since mangled names are unique, we only need to check if the names are - // the same. - if (query.GetSearchByMangledName()) { - if (die.GetMangledName(/*substitute_name_allowed=*/false) != - query.GetTypeBasename().GetStringRef()) - return true; // Keep iterating over index types, mangled name mismatch. - if (Type *matching_type = ResolveType(die, true, true)) { - results.InsertUnique(matching_type->shared_from_this()); - return !results.Done(query); // Keep iterating if we aren't done. - } - return true; // Keep iterating over index types, weren't able to resolve - // this type - } - - // Check the context matches - std::vector die_context; - if (query.GetModuleSearch()) - die_context = die.GetDeclContext(); - else - die_context = die.GetTypeLookupContext(); - assert(!die_context.empty()); - if (!query.ContextMatches(die_context)) - return true; // Keep iterating over index types, context mismatch. - - // Try to resolve the type. if (Type *matching_type = ResolveType(die, true, true)) { - if (matching_type->IsTemplateType()) { + if (!query.GetSearchByMangledName() && matching_type->IsTemplateType()) { // We have to watch out for case where we lookup a type by basename and // it matches a template with simple template names. Like looking up // "Foo" and if we have simple template names then we will match @@ -2790,7 +2759,7 @@ void SymbolFileDWARF::FindTypes(const TypeQuery &query, TypeResults &results) { // With -gsimple-template-names, a templated type's DW_AT_name will not // contain the template parameters. Try again stripping '<' and anything // after, filtering out entries with template parameters that don't match. - if (!have_index_match) { + if (!have_index_match && !query.GetSearchByMangledName()) { // Create a type matcher with a compiler context that is tuned for // -gsimple-template-names. We will use this for the index lookup and the // context matching, but will use the original "match" to insert matches @@ -2804,23 +2773,6 @@ void SymbolFileDWARF::FindTypes(const TypeQuery &query, TypeResults &results) { // Copy our match's context and update the basename we are looking for // so we can use this only to compare the context correctly. m_index->GetTypesWithQuery(query_simple, [&](DWARFDIE die) { - // Check the language, but only if we have a language filter. - if (query.HasLanguage()) { - if (!query.LanguageMatches(GetLanguageFamily(*die.GetCU()))) - return true; // Keep iterating over index types, language mismatch. - } - - // Check the context matches - std::vector die_context; - if (query.GetModuleSearch()) - die_context = die.GetDeclContext(); - else - die_context = die.GetTypeLookupContext(); - assert(!die_context.empty()); - if (!query_simple.ContextMatches(die_context)) - return true; // Keep iterating over index types, context mismatch. - - // Try to resolve the type. if (Type *matching_type = ResolveType(die, true, true)) { ConstString name = matching_type->GetQualifiedName(); // We have found a type that still might not match due to template diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp index c0416b4d06815..0f77b2e28004e 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp @@ -379,18 +379,17 @@ uint32_t SymbolFileNativePDB::CalculateNumCompileUnits() { return count; } -Block &SymbolFileNativePDB::CreateBlock(PdbCompilandSymId block_id) { +Block *SymbolFileNativePDB::CreateBlock(PdbCompilandSymId block_id) { CompilandIndexItem *cii = m_index->compilands().GetCompiland(block_id.modi); CVSymbol sym = cii->m_debug_stream.readSymbolAtOffset(block_id.offset); CompUnitSP comp_unit = GetOrCreateCompileUnit(*cii); lldb::user_id_t opaque_block_uid = toOpaqueUid(block_id); - BlockSP child_block = std::make_shared(opaque_block_uid); auto ts_or_err = GetTypeSystemForLanguage(comp_unit->GetLanguage()); if (auto err = ts_or_err.takeError()) - return *child_block; + return nullptr; auto ts = *ts_or_err; if (!ts) - return *child_block; + return nullptr; PdbAstBuilder* ast_builder = ts->GetNativePDBParser(); switch (sym.kind()) { @@ -403,7 +402,7 @@ Block &SymbolFileNativePDB::CreateBlock(PdbCompilandSymId block_id) { Block &block = func->GetBlock(false); if (block.GetNumRanges() == 0) block.AddRange(Block::Range(0, func->GetAddressRange().GetByteSize())); - return block; + return █ } break; } @@ -416,13 +415,16 @@ Block &SymbolFileNativePDB::CreateBlock(PdbCompilandSymId block_id) { cantFail(SymbolDeserializer::deserializeAs(sym, block)); lldbassert(block.Parent != 0); PdbCompilandSymId parent_id(block_id.modi, block.Parent); - Block &parent_block = GetOrCreateBlock(parent_id); - Function *func = parent_block.CalculateSymbolContextFunction(); + Block *parent_block = GetOrCreateBlock(parent_id); + if (!parent_block) + return nullptr; + Function *func = parent_block->CalculateSymbolContextFunction(); lldbassert(func); lldb::addr_t block_base = m_index->MakeVirtualAddress(block.Segment, block.CodeOffset); lldb::addr_t func_base = func->GetAddressRange().GetBaseAddress().GetFileAddress(); + BlockSP child_block = std::make_shared(opaque_block_uid); if (block_base >= func_base) child_block->AddRange(Block::Range(block_base - func_base, block.CodeSize)); else { @@ -435,7 +437,7 @@ Block &SymbolFileNativePDB::CreateBlock(PdbCompilandSymId block_id) { block_id.modi, block_id.offset, block_base, block_base + block.CodeSize, func_base); } - parent_block.AddChild(child_block); + parent_block->AddChild(child_block); ast_builder->GetOrCreateBlockDecl(block_id); m_blocks.insert({opaque_block_uid, child_block}); break; @@ -445,8 +447,11 @@ Block &SymbolFileNativePDB::CreateBlock(PdbCompilandSymId block_id) { comp_unit->GetLineTable(); std::shared_ptr inline_site = m_inline_sites[opaque_block_uid]; - Block &parent_block = GetOrCreateBlock(inline_site->parent_id); - parent_block.AddChild(child_block); + Block *parent_block = GetOrCreateBlock(inline_site->parent_id); + if (!parent_block) + return nullptr; + BlockSP child_block = std::make_shared(opaque_block_uid); + parent_block->AddChild(child_block); ast_builder->GetOrCreateInlinedFunctionDecl(block_id); // Copy ranges from InlineSite to Block. for (size_t i = 0; i < inline_site->ranges.GetSize(); ++i) { @@ -469,7 +474,7 @@ Block &SymbolFileNativePDB::CreateBlock(PdbCompilandSymId block_id) { lldbassert(false && "Symbol is not a block!"); } - return *child_block; + return nullptr; } lldb::FunctionSP SymbolFileNativePDB::CreateFunction(PdbCompilandSymId func_id, @@ -997,10 +1002,10 @@ SymbolFileNativePDB::GetOrCreateCompileUnit(const CompilandIndexItem &cci) { return emplace_result.first->second; } -Block &SymbolFileNativePDB::GetOrCreateBlock(PdbCompilandSymId block_id) { +Block *SymbolFileNativePDB::GetOrCreateBlock(PdbCompilandSymId block_id) { auto iter = m_blocks.find(toOpaqueUid(block_id)); if (iter != m_blocks.end()) - return *iter->second; + return iter->second.get(); return CreateBlock(block_id); } @@ -1124,14 +1129,16 @@ uint32_t SymbolFileNativePDB::ResolveSymbolContext( } if (type == PDB_SymType::Block) { - Block &block = GetOrCreateBlock(csid); - sc.function = block.CalculateSymbolContextFunction(); + Block *block = GetOrCreateBlock(csid); + if (!block) + continue; + sc.function = block->CalculateSymbolContextFunction(); if (sc.function) { sc.function->GetBlock(true); addr_t func_base = sc.function->GetAddressRange().GetBaseAddress().GetFileAddress(); addr_t offset = file_addr - func_base; - sc.block = block.FindInnermostBlockByOffset(offset); + sc.block = block->FindInnermostBlockByOffset(offset); } } if (sc.function) @@ -1837,12 +1844,16 @@ VariableSP SymbolFileNativePDB::CreateLocalVariable(PdbCompilandSymId scope_id, PdbCompilandSymId var_id, bool is_param) { ModuleSP module = GetObjectFile()->GetModule(); - Block &block = GetOrCreateBlock(scope_id); + Block *block = GetOrCreateBlock(scope_id); + if (!block) + return nullptr; + // Get function block. - Block *func_block = █ + Block *func_block = block; while (func_block->GetParent()) { func_block = func_block->GetParent(); } + Address addr; func_block->GetStartAddress(addr); VariableInfo var_info = @@ -1875,8 +1886,8 @@ VariableSP SymbolFileNativePDB::CreateLocalVariable(PdbCompilandSymId scope_id, bool static_member = false; Variable::RangeList scope_ranges; VariableSP var_sp = std::make_shared( - toOpaqueUid(var_id), name.c_str(), name.c_str(), sftype, var_scope, - &block, scope_ranges, &decl, var_info.location, external, artificial, + toOpaqueUid(var_id), name.c_str(), name.c_str(), sftype, var_scope, block, + scope_ranges, &decl, var_info.location, external, artificial, location_is_constant_data, static_member); if (!is_param) { auto ts_or_err = GetTypeSystemForLanguage(comp_unit_sp->GetLanguage()); @@ -1935,7 +1946,9 @@ TypeSP SymbolFileNativePDB::GetOrCreateTypedef(PdbGlobalSymId id) { } size_t SymbolFileNativePDB::ParseVariablesForBlock(PdbCompilandSymId block_id) { - Block &block = GetOrCreateBlock(block_id); + Block *block = GetOrCreateBlock(block_id); + if (!block) + return 0; size_t count = 0; @@ -1977,10 +1990,10 @@ size_t SymbolFileNativePDB::ParseVariablesForBlock(PdbCompilandSymId block_id) { return 0; } - VariableListSP variables = block.GetBlockVariableList(false); + VariableListSP variables = block->GetBlockVariableList(false); if (!variables) { variables = std::make_shared(); - block.SetVariableList(variables); + block->SetVariableList(variables); } CVSymbolArray syms = limitSymbolArrayToScope( @@ -2027,7 +2040,7 @@ size_t SymbolFileNativePDB::ParseVariablesForBlock(PdbCompilandSymId block_id) { // Pass false for set_children, since we call this recursively so that the // children will call this for themselves. - block.SetDidParseVariables(true, false); + block->SetDidParseVariables(true, false); return count; } diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h index 669c44aa131ed..b0e78a243a3c2 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h @@ -226,7 +226,7 @@ class SymbolFileNativePDB : public SymbolFileCommon { lldb::TypeSP GetOrCreateType(PdbTypeSymId type_id); lldb::TypeSP GetOrCreateType(llvm::codeview::TypeIndex ti); lldb::VariableSP GetOrCreateGlobalVariable(PdbGlobalSymId var_id); - Block &GetOrCreateBlock(PdbCompilandSymId block_id); + Block *GetOrCreateBlock(PdbCompilandSymId block_id); lldb::VariableSP GetOrCreateLocalVariable(PdbCompilandSymId scope_id, PdbCompilandSymId var_id, bool is_param); @@ -234,7 +234,7 @@ class SymbolFileNativePDB : public SymbolFileCommon { lldb::FunctionSP CreateFunction(PdbCompilandSymId func_id, CompileUnit &comp_unit); - Block &CreateBlock(PdbCompilandSymId block_id); + Block *CreateBlock(PdbCompilandSymId block_id); lldb::VariableSP CreateLocalVariable(PdbCompilandSymId scope_id, PdbCompilandSymId var_id, bool is_param); lldb::TypeSP CreateTypedef(PdbGlobalSymId id); diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp index 9125ceca74a00..db33525978a16 100644 --- a/lldb/source/Target/Process.cpp +++ b/lldb/source/Target/Process.cpp @@ -6080,6 +6080,10 @@ bool Process::GetProcessInfo(ProcessInstanceInfo &info) { return platform_sp->GetProcessInfo(GetID(), info); } +lldb_private::UUID Process::FindModuleUUID(const llvm::StringRef path) { + return lldb_private::UUID(); +} + ThreadCollectionSP Process::GetHistoryThreads(lldb::addr_t addr) { ThreadCollectionSP threads; diff --git a/lldb/test/API/symbol_ondemand/shared_library/TestSharedLibOnDemand.py b/lldb/test/API/symbol_ondemand/shared_library/TestSharedLibOnDemand.py index f1c23a58d1f48..dbb9576ed4d51 100644 --- a/lldb/test/API/symbol_ondemand/shared_library/TestSharedLibOnDemand.py +++ b/lldb/test/API/symbol_ondemand/shared_library/TestSharedLibOnDemand.py @@ -59,7 +59,7 @@ def test_source_line_breakpoint(self): lldbutil.check_breakpoint(self, bpno=1, expected_hit_count=1) thread = process.GetSelectedThread() - stack_frames = lldbutil.get_stack_frames(thread) + stack_frames = thread.frames self.assertGreater(len(stack_frames), 2) leaf_frame = stack_frames[0] @@ -97,7 +97,7 @@ def test_symbolic_breakpoint(self): lldbutil.check_breakpoint(self, bpno=1, expected_hit_count=1) thread = process.GetSelectedThread() - stack_frames = lldbutil.get_stack_frames(thread) + stack_frames = thread.frames self.assertGreater(len(stack_frames), 2) leaf_frame = stack_frames[0] diff --git a/llvm/Maintainers.md b/llvm/Maintainers.md index 71dd4c6b75b63..9744aa1aa43be 100644 --- a/llvm/Maintainers.md +++ b/llvm/Maintainers.md @@ -121,8 +121,8 @@ david.trevelyan@gmail.com (email), [davidtrevelyan](https://github.com/davidtrev #### Parts of code generator not covered by someone else -Evan Cheng \ -evan.cheng@apple.com (email) +Matt Arsenault \ +Matthew.Arsenault@amd.com, arsenm2@gmail.com (email), [arsenm](https://github.com/arsenm) (GitHub) #### SelectionDAG @@ -180,8 +180,16 @@ marksl@synopsys.com (email), [markschimmel](https://github.com/markschimmel) (Gi #### ARM backend -Renato Golin \ -rengolin@systemcall.eu (email), [rengolin](https://github.com/rengolin) (GitHub) +David Green \ +david.green@arm.com (email), [davemgreen](https://github.com/davemgreen) (GitHub) \ +Oliver Stannard (Especially assembly/dissassembly) \ +oliver.stannard@arm.com (email), [ostannard](https://github.com/ostannard) (GitHub) \ +Nashe Mncube \ +nashe.mncube@arm.com (email), [nasherm](https://github.com/nasherm) (GitHub) \ +Peter Smith (Anything ABI) \ +peter.smith@arm.com (email), [smithp35](https://github.com/smithp35) (GitHub) \ +Ties Stuij (GlobalISel and early arch support) \ +ties.stuij@arm.com (email), [stuij](https://github.com/stuij) (GitHub) #### AVR backend @@ -406,11 +414,6 @@ echristo@gmail.com (email), [echristo](https://github.com/echristo) (GitHub) Anton Korobeynikov \ anton@korobeynikov.info (email), [asl](https://github.com/asl) (GitHub) -#### ARM EABI - -Anton Korobeynikov \ -anton@korobeynikov.info (email), [asl](https://github.com/asl) (GitHub) - #### LLVM Buildbot Galina Kistanova \ @@ -469,6 +472,9 @@ sabre@nondot.org (email), [lattner](https://github.com/lattner) (GitHub), clattn ### Inactive or former component maintainers Justin Bogner (mail@justinbogner.com, [bogner](https://github.com/bogner)) -- SelectionDAG \ +Evan Cheng (evan.cheng@apple.com) -- Parts of code generator not covered by someone else \ +Renato Golin (rengolin@systemcall.eu, [rengolin](https://github.com/rengolin)) -- ARM backend \ +Anton Korobeynikov (anton@korobeynikov.info, [asl](https://github.com/asl)) -- ARM EABI \ Hans Wennborg (hans@chromium.org, [zmodem](https://github.com/zmodem)) -- Release management \ ### Former maintainers of removed components diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 161363e0dd6bc..411a1209ef947 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -1407,6 +1407,19 @@ The AMDGPU backend implements the following LLVM IR intrinsics. llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4 Emit `v_mfma_scale_f32_32x32x64_f8f6f4` + llvm.amdgcn.permlane16.swap Provide direct access to `v_permlane16_swap_b32` instruction on supported targets. + Swaps the values across lanes of first 2 operands. Odd rows of the first operand are + swapped with even rows of the second operand (one row is 16 lanes). + Returns a pair for the swapped registers. The first element of the return corresponds + to the swapped element of the first argument. + + + llvm.amdgcn.permlane32.swap Provide direct access to `v_permlane32_swap_b32` instruction on supported targets. + Swaps the values across lanes of first 2 operands. Rows 2 and 3 of the first operand are + swapped with rows 0 and 1 of the second operand (one row is 16 lanes). + Returns a pair for the swapped registers. The first element of the return + corresponds to the swapped element of the first argument. + ============================================== ========================================================== .. TODO:: diff --git a/llvm/docs/DirectX/DXILResources.rst b/llvm/docs/DirectX/DXILResources.rst index ad8ede9c59fbf..dcec9611d8aaa 100644 --- a/llvm/docs/DirectX/DXILResources.rst +++ b/llvm/docs/DirectX/DXILResources.rst @@ -162,9 +162,10 @@ the subsequent ``dx.op.annotateHandle`` operation in. Note that we don't have an analogue for `dx.op.createHandle`_, since ``dx.op.createHandleFromBinding`` subsumes it. -For simplicity of lowering, we match DXIL in using an index from the beginning -of the binding space rather than an index from the lower bound of the binding -itself. +We diverge from DXIL and index from the beginning of the binding rather than +indexing from the beginning of the binding space. This matches the semantics +more clearly and avoids a non-obvious invariant in what constitutes valid +arguments. .. _dx.op.createHandle: https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#resource-handles @@ -194,7 +195,7 @@ itself. * - ``%index`` - 4 - ``i32`` - - Index from the beginning of the binding space to access. + - Index from the beginning of the binding. * - ``%non-uniform`` - 5 - i1 @@ -233,6 +234,12 @@ Examples: @llvm.dx.handle.fromBinding.tdx.RawBuffer_i8_0_0t( i32 1, i32 8, i32 1, i32 0, i1 false) + ; RWBuffer Global[3] : register(u6, space5) + ; RWBuffer Buf = Global[2]; + %buf = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) + @llvm.dx.handle.fromBinding.tdx.TypedBuffer_f32_1_0( + i32 5, i32 6, i32 3, i32 2, i1 false) + .. list-table:: ``@llvm.dx.handle.fromHeap`` :header-rows: 1 diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 9f4c90ba82a41..be5b6e2e215e6 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -21522,9 +21522,9 @@ This is an overloaded intrinsic. :: - declare <16 x i32> @llvm.vp.abs.v16i32 (<16 x i32> , <16 x i1> , i32 , i1 ) - declare @llvm.vp.abs.nxv4i32 ( , , i32 , i1 ) - declare <256 x i64> @llvm.vp.abs.v256i64 (<256 x i64> , <256 x i1> , i32 , i1 ) + declare <16 x i32> @llvm.vp.abs.v16i32 (<16 x i32> , i1 , <16 x i1> , i32 ) + declare @llvm.vp.abs.nxv4i32 ( , i1 , , i32 ) + declare <256 x i64> @llvm.vp.abs.v256i64 (<256 x i64> , i1 , <256 x i1> , i32 ) Overview: """"""""" @@ -21536,12 +21536,12 @@ Arguments: """""""""" The first argument and the result have the same vector of integer type. The -second argument is the vector mask and has the same number of elements as the -result vector type. The third argument is the explicit vector length of the -operation. The fourth argument must be a constant and is a flag to indicate -whether the result value of the '``llvm.vp.abs``' intrinsic is a -:ref:`poison value ` if the first argument is statically or -dynamically an ``INT_MIN`` value. +second argument must be a constant and is a flag to indicate whether the result +value of the '``llvm.vp.abs``' intrinsic is a :ref:`poison value ` +if the first argument is statically or dynamically an ``INT_MIN`` value. The +third argument is the vector mask and has the same number of elements as the +result vector type. The fourth argument is the explicit vector length of the +operation. Semantics: """""""""" @@ -21554,7 +21554,7 @@ Examples: .. code-block:: llvm - %r = call <4 x i32> @llvm.vp.abs.v4i32(<4 x i32> %a, <4 x i1> %mask, i32 %evl, i1 false) + %r = call <4 x i32> @llvm.vp.abs.v4i32(<4 x i32> %a, i1 false, <4 x i1> %mask, i32 %evl) ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r %t = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a, i1 false) @@ -25260,9 +25260,9 @@ This is an overloaded intrinsic. :: - declare <16 x i32> @llvm.vp.ctlz.v16i32 (<16 x i32> , <16 x i1> , i32 , i1 ) - declare @llvm.vp.ctlz.nxv4i32 ( , , i32 , i1 ) - declare <256 x i64> @llvm.vp.ctlz.v256i64 (<256 x i64> , <256 x i1> , i32 , i1 ) + declare <16 x i32> @llvm.vp.ctlz.v16i32 (<16 x i32> , i1 , <16 x i1> , i32 ) + declare @llvm.vp.ctlz.nxv4i32 ( , i1 , , i32 ) + declare <256 x i64> @llvm.vp.ctlz.v256i64 (<256 x i64> , i1 , <256 x i1> , i32 ) Overview: """"""""" @@ -25274,11 +25274,11 @@ Arguments: """""""""" The first argument and the result have the same vector of integer type. The -second argument is the vector mask and has the same number of elements as the -result vector type. The third argument is the explicit vector length of the -operation. The fourth argument is a constant flag that indicates whether the -intrinsic returns a valid result if the first argument is zero. If the first -argument is zero and the fourth argument is true, the result is poison. +second argument is a constant flag that indicates whether the intrinsic returns +a valid result if the first argument is zero. The third argument is the vector +mask and has the same number of elements as the result vector type. the fourth +argument is the explicit vector length of the operation. If the first argument +is zero and the second argument is true, the result is poison. Semantics: """""""""" @@ -25291,7 +25291,7 @@ Examples: .. code-block:: llvm - %r = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %a, <4 x i1> %mask, i32 %evl, i1 false) + %r = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %a, i1 false, <4 x i1> %mask, i32 %evl) ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r %t = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) @@ -25309,9 +25309,9 @@ This is an overloaded intrinsic. :: - declare <16 x i32> @llvm.vp.cttz.v16i32 (<16 x i32> , <16 x i1> , i32 , i1 ) - declare @llvm.vp.cttz.nxv4i32 ( , , i32 , i1 ) - declare <256 x i64> @llvm.vp.cttz.v256i64 (<256 x i64> , <256 x i1> , i32 , i1 ) + declare <16 x i32> @llvm.vp.cttz.v16i32 (<16 x i32> , i1 , <16 x i1> , i32 ) + declare @llvm.vp.cttz.nxv4i32 ( , i1 , , i32 ) + declare <256 x i64> @llvm.vp.cttz.v256i64 (<256 x i64> , i1 , <256 x i1> , i32 ) Overview: """"""""" @@ -25323,11 +25323,11 @@ Arguments: """""""""" The first argument and the result have the same vector of integer type. The -second argument is the vector mask and has the same number of elements as the -result vector type. The third argument is the explicit vector length of the -operation. The fourth argument is a constant flag that indicates whether the -intrinsic returns a valid result if the first argument is zero. If the first -argument is zero and the fourth argument is true, the result is poison. +second argument is a constant flag that indicates whether the intrinsic +returns a valid result if the first argument is zero. The third argument is +the vector mask and has the same number of elements as the result vector type. +The fourth argument is the explicit vector length of the operation. If the +first argument is zero and the second argument is true, the result is poison. Semantics: """""""""" @@ -25340,7 +25340,7 @@ Examples: .. code-block:: llvm - %r = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %a, <4 x i1> %mask, i32 %evl, i1 false) + %r = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %a, i1 false, <4 x i1> %mask, i32 %evl) ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r %t = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) diff --git a/llvm/include/llvm/ADT/SmallVectorExtras.h b/llvm/include/llvm/ADT/SmallVectorExtras.h index eea91ca81ca61..061293fae0830 100644 --- a/llvm/include/llvm/ADT/SmallVectorExtras.h +++ b/llvm/include/llvm/ADT/SmallVectorExtras.h @@ -19,12 +19,28 @@ namespace llvm { +/// Filter a range to a SmallVector with the element types deduced. +template +auto filter_to_vector(ContainerTy &&C, PredicateFn &&Pred) { + return to_vector(make_filter_range(std::forward(C), + std::forward(Pred))); +} + +/// Filter a range to a SmallVector with the element types deduced. +template +auto filter_to_vector(ContainerTy &&C, PredicateFn &&Pred) { + return to_vector(make_filter_range(std::forward(C), + std::forward(Pred))); +} + /// Map a range to a SmallVector with element types deduced from the mapping. template auto map_to_vector(ContainerTy &&C, FuncTy &&F) { return to_vector( map_range(std::forward(C), std::forward(F))); } + +/// Map a range to a SmallVector with element types deduced from the mapping. template auto map_to_vector(ContainerTy &&C, FuncTy &&F) { return to_vector( diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/x86_64.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/x86_64.def index 0c026cf443361..a93d92870cf32 100644 --- a/llvm/include/llvm/BinaryFormat/ELFRelocs/x86_64.def +++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/x86_64.def @@ -46,3 +46,4 @@ ELF_RELOC(R_X86_64_REX_GOTPCRELX, 42) ELF_RELOC(R_X86_64_CODE_4_GOTPCRELX, 43) ELF_RELOC(R_X86_64_CODE_4_GOTTPOFF, 44) ELF_RELOC(R_X86_64_CODE_4_GOTPC32_TLSDESC, 45) +ELF_RELOC(R_X86_64_CODE_6_GOTTPOFF, 50) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index d3d68ff1c6ed2..98cbb4886642b 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -277,6 +277,20 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast); } + bool areInlineCompatible(const Function *Caller, + const Function *Callee) const { + const TargetMachine &TM = getTLI()->getTargetMachine(); + + const FeatureBitset &CallerBits = + TM.getSubtargetImpl(*Caller)->getFeatureBits(); + const FeatureBitset &CalleeBits = + TM.getSubtargetImpl(*Callee)->getFeatureBits(); + + // Inline a callee if its target-features are a subset of the callers + // target-features. + return (CallerBits & CalleeBits) == CalleeBits; + } + bool hasBranchDivergence(const Function *F = nullptr) { return false; } bool isSourceOfDivergence(const Value *V) { return false; } @@ -1618,6 +1632,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return thisT()->getArithmeticInstrCost(*FOp, ICA.getReturnType(), CostKind); } + if (VPCastIntrinsic::isVPCast(ICA.getID())) { + return thisT()->getCastInstrCost( + *FOp, ICA.getReturnType(), ICA.getArgTypes()[0], + TTI::CastContextHint::None, CostKind); + } + if (VPCmpIntrinsic::isVPCmp(ICA.getID())) { + // We can only handle vp_cmp intrinsics with underlying instructions. + if (ICA.getInst()) { + assert(FOp); + auto *UI = cast(ICA.getInst()); + return thisT()->getCmpSelInstrCost(*FOp, ICA.getArgTypes()[0], + ICA.getReturnType(), + UI->getPredicate(), CostKind); + } + } } std::optional FID = diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index f682b20816d57..2384b22c05266 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -378,6 +378,8 @@ class LegalizerHelper { LLT CastTy); LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy); + LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, + LLT CastTy); LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy); LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index 8b1c11a6f4130..b681a0708db4b 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -292,6 +292,9 @@ LegalityPredicate isPointer(unsigned TypeIdx); /// True iff the specified type index is a pointer with the specified address /// space. LegalityPredicate isPointer(unsigned TypeIdx, unsigned AddrSpace); +/// True iff the specified type index is a vector of pointers (with any address +/// space). +LegalityPredicate isPointerVector(unsigned TypeIdx); /// True if the type index is a vector with element type \p EltTy LegalityPredicate elementTypeIs(unsigned TypeIdx, LLT EltTy); diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 07b59b241d9f9..408adcd330b84 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -159,6 +159,12 @@ class TargetInstrInfo : public MCInstrInfo { return true; } + /// For a "cheap" instruction which doesn't enable additional sinking, + /// should MachineSink break a critical edge to sink it anyways? + virtual bool shouldBreakCriticalEdgeToSink(MachineInstr &MI) const { + return false; + } + protected: /// For instructions with opcodes for which the M_REMATERIALIZABLE flag is /// set, this hook lets the target specify whether the instruction is actually diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h index 87e876273c4b9..97ff7e8407024 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h @@ -69,6 +69,7 @@ template struct DWARFTypePrinter { case dwarf::DW_TAG_union_type: case dwarf::DW_TAG_namespace: case dwarf::DW_TAG_enumeration_type: + case dwarf::DW_TAG_typedef: return true; default: break; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 75e73bedd9348..bd7fb2361aaeb 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -769,6 +769,7 @@ def OMP_Flush : Directive<"flush"> { // OMPKinds.def. VersionedClause, VersionedClause, + VersionedClause, ]; let association = AS_None; let category = CA_Executable; diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index f9a4f1899cc60..79ad9dbb67430 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -594,6 +594,21 @@ def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic; def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic; def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic; +class AMDGPUCvtScaleF32Intrinsic : DefaultAttrsIntrinsic< + [DstTy], [Src0Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable] +>, ClangBuiltin<"__builtin_amdgcn_"#name>; + +class AMDGPUCvtScaleF32ToFP6BF6Intrinsic : DefaultAttrsIntrinsic< + [DstTy], [Src0Ty, Src1Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable] +>, ClangBuiltin<"__builtin_amdgcn_"#name>; + +def int_amdgcn_cvt_scalef32_pk32_fp6_f16 : AMDGPUCvtScaleF32Intrinsic; +def int_amdgcn_cvt_scalef32_pk32_bf6_f16 : AMDGPUCvtScaleF32Intrinsic; +def int_amdgcn_cvt_scalef32_pk32_fp6_bf16 : AMDGPUCvtScaleF32Intrinsic; +def int_amdgcn_cvt_scalef32_pk32_bf6_bf16 : AMDGPUCvtScaleF32Intrinsic; +def int_amdgcn_cvt_scalef32_2xpk16_fp6_f32 : AMDGPUCvtScaleF32ToFP6BF6Intrinsic; +def int_amdgcn_cvt_scalef32_2xpk16_bf6_f32 : AMDGPUCvtScaleF32ToFP6BF6Intrinsic; + def int_amdgcn_prng_b32 : DefaultAttrsIntrinsic< [llvm_i32_ty], [llvm_i32_ty], [IntrNoMem] >, ClangBuiltin<"__builtin_amdgcn_prng_b32">; @@ -1437,7 +1452,7 @@ def int_amdgcn_raw_ptr_buffer_atomic_cmpswap : Intrinsic< // gfx908 intrinsic def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic; -// Supports float and <2 x half> on gfx908. Supports v2bf16 on gfx90a, gfx940, gfx12+. +// Supports float and <2 x half> on gfx908. Supports v2bf16 on gfx90a, gfx940, gfx950, gfx12+. def int_amdgcn_raw_ptr_buffer_atomic_fadd : AMDGPURawPtrBufferAtomic; class AMDGPUStructBufferAtomic : Intrinsic < @@ -1512,7 +1527,7 @@ def int_amdgcn_struct_ptr_buffer_atomic_cmpswap : Intrinsic< ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<2, 0>; -// gfx908 intrinsic +// gfx908 intrinsic. Supports v2bf16 on gfx12+ and gfx950 def int_amdgcn_struct_buffer_atomic_fadd : AMDGPUStructBufferAtomic; def int_amdgcn_struct_ptr_buffer_atomic_fadd : AMDGPUStructPtrBufferAtomic; @@ -2726,6 +2741,10 @@ class AMDGPULoadIntrinsic: def int_amdgcn_global_load_tr_b64 : AMDGPULoadIntrinsic; def int_amdgcn_global_load_tr_b128 : AMDGPULoadIntrinsic; +def int_amdgcn_ds_read_tr4_b64 : AMDGPULoadIntrinsic; +def int_amdgcn_ds_read_tr6_b96 : AMDGPULoadIntrinsic; +def int_amdgcn_ds_read_tr8_b64 : AMDGPULoadIntrinsic; +def int_amdgcn_ds_read_tr16_b64 : AMDGPULoadIntrinsic; // i32 @llvm.amdgcn.wave.id() def int_amdgcn_wave_id : @@ -2801,6 +2820,24 @@ def int_amdgcn_fdot2_f32_bf16 : [IntrNoMem, IntrSpeculatable, ImmArg>] >; +// f32 %r = llvm.amdgcn.fdot2c.f32.bf16(v2bf16 %a, v2bf16 %b, f32 %c, i1 %clamp) +// %r = %a[0] * %b[0] + %a[1] * %b[1] + c +// TODO: This actually is similar to llvm.amdgcn.fdot2 intrinsics which produces +// v_dot2c_f32_f16 on gfx940. Maybe we can consolidate these. + +def int_amdgcn_fdot2c_f32_bf16 : + ClangBuiltin<"__builtin_amdgcn_fdot2c_f32_bf16">, + DefaultAttrsIntrinsic< + [llvm_float_ty], // %r + [ + llvm_v2bf16_ty, // %a + llvm_v2bf16_ty, // %b + llvm_float_ty, // %c + llvm_i1_ty // %clamp + ], + [IntrNoMem, IntrSpeculatable, ImmArg>] + >; + // i32 %r = llvm.amdgcn.sdot2(v2i16 %a, v2i16 %b, i32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c def int_amdgcn_sdot2 : @@ -3166,6 +3203,30 @@ def int_amdgcn_smfmac_f32_32x32x64_fp8_bf8 : AMDGPUMSmfmacIntrinsic; } +// { vdst_new, vsrc_new } llvm.amdgcn.permlane16.swap +def int_amdgcn_permlane16_swap : + Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, + llvm_i1_ty, llvm_i1_ty], + [IntrNoMem, IntrConvergent, IntrWillReturn, + ImmArg>, ImmArg>, IntrNoCallback, IntrNoFree]>; + +// { vdst_new, vsrc_new } llvm.amdgcn.permlane32.swap +def int_amdgcn_permlane32_swap : + Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, + llvm_i1_ty, llvm_i1_ty], + [IntrNoMem, IntrConvergent, IntrWillReturn, + ImmArg>, ImmArg>, IntrNoCallback, IntrNoFree]>; + +// llvm.amdgcn.ashr_pk_i8_i32 int vdst, int src0, int src1 int src2 +def int_amdgcn_ashr_pk_i8_i32 : ClangBuiltin<"__builtin_amdgcn_ashr_pk_i8_i32">, + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable]>; + +// llvm.amdgcn.ashr_pk_u8_i32 int vdst, int src0, int src1 int src2 +def int_amdgcn_ashr_pk_u8_i32 : ClangBuiltin<"__builtin_amdgcn_ashr_pk_u8_i32">, + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable]>; + //===----------------------------------------------------------------------===// // Special Intrinsics for backend internal use only. No frontend // should emit calls to these. diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index dad60a2535cf4..bf49ec6f6c649 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -37,7 +37,7 @@ def int_dx_typedBufferStore : DefaultAttrsIntrinsic<[], [llvm_any_ty, llvm_i32_ty, llvm_anyvector_ty], [IntrWriteMem]>; -def int_dx_updateCounter +def int_dx_bufferUpdateCounter : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_i8_ty], [IntrInaccessibleMemOrArgMemOnly]>; diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index e115708583286..17b70062e58fa 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -106,9 +106,14 @@ let TargetPrefix = "spv" in { [llvm_any_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty], [IntrNoMem]>; + def int_spv_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; def int_spv_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; + def int_spv_bufferUpdateCounter + : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_i8_ty], + [IntrInaccessibleMemOrArgMemOnly]>; + // Read a value from the image buffer. It does not translate directly to a // single OpImageRead because the result type is not necessarily a 4 element // vector. diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 0d6df72790632..fc4c0124d00b8 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -1716,7 +1716,8 @@ template struct TwoOps_match { }; /// Matches instructions with Opcode and three operands. -template +template struct ThreeOps_match { T0 Op1; T1 Op2; @@ -1728,8 +1729,12 @@ struct ThreeOps_match { template bool match(OpTy *V) { if (V->getValueID() == Value::InstructionVal + Opcode) { auto *I = cast(V); - return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)) && - Op3.match(I->getOperand(2)); + if (!Op1.match(I->getOperand(0))) + return false; + if (Op2.match(I->getOperand(1)) && Op3.match(I->getOperand(2))) + return true; + return CommutableOp2Op3 && Op2.match(I->getOperand(2)) && + Op3.match(I->getOperand(1)); } return false; } @@ -1781,6 +1786,14 @@ m_SelectCst(const Cond &C) { return m_Select(C, m_ConstantInt(), m_ConstantInt()); } +/// Match Select(C, LHS, RHS) or Select(C, RHS, LHS) +template +inline ThreeOps_match +m_c_Select(const LHS &L, const RHS &R) { + return ThreeOps_match(m_Value(), L, R); +} + /// Matches FreezeInst. template inline OneOps_match m_Freeze(const OpTy &Op) { diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index f4f590a4edc9f..dd4d90864a08c 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -389,14 +389,6 @@ struct AllocationInfo { PortableMemInfoBlock Info; AllocationInfo() = default; - AllocationInfo( - const IndexedAllocationInfo &IndexedAI, - llvm::function_ref IdToFrameCallback) { - for (const FrameId &Id : IndexedAI.CallStack) { - CallStack.push_back(IdToFrameCallback(Id)); - } - Info = IndexedAI.Info; - } void printYAML(raw_ostream &OS) const { OS << " -\n"; @@ -1125,21 +1117,20 @@ template class CallStackRadixTreeBuilder { // Encode a call stack into RadixArray. Return the starting index within // RadixArray. - LinearCallStackId - encodeCallStack(const llvm::SmallVector *CallStack, - const llvm::SmallVector *Prev, - std::optional> - MemProfFrameIndexes); + LinearCallStackId encodeCallStack( + const llvm::SmallVector *CallStack, + const llvm::SmallVector *Prev, + const llvm::DenseMap *MemProfFrameIndexes); public: CallStackRadixTreeBuilder() = default; // Build a radix tree array. - void build(llvm::MapVector> - &&MemProfCallStackData, - std::optional> - MemProfFrameIndexes, - llvm::DenseMap &FrameHistogram); + void + build(llvm::MapVector> + &&MemProfCallStackData, + const llvm::DenseMap *MemProfFrameIndexes, + llvm::DenseMap &FrameHistogram); ArrayRef getRadixArray() const { return RadixArray; } @@ -1147,18 +1138,6 @@ template class CallStackRadixTreeBuilder { return std::move(CallStackPos); } }; - -// Verify that each CallStackId is computed with hashCallStack. This function -// is intended to help transition from CallStack to CSId in -// IndexedAllocationInfo. -void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record); - -// Verify that each CallStackId is computed with hashCallStack. This function -// is intended to help transition from CallStack to CSId in -// IndexedAllocationInfo. -void verifyFunctionProfileData( - const llvm::MapVector - &FunctionProfileData); } // namespace memprof } // namespace llvm diff --git a/llvm/include/llvm/ProfileData/MemProfReader.h b/llvm/include/llvm/ProfileData/MemProfReader.h index 57ddcbf350060..caf404d95d865 100644 --- a/llvm/include/llvm/ProfileData/MemProfReader.h +++ b/llvm/include/llvm/ProfileData/MemProfReader.h @@ -46,26 +46,6 @@ class MemProfReader { return Iterator(this); } - // Return a const reference to the internal Id to Frame mappings. - LLVM_DEPRECATED("Use takeMemProfData instead", "takeMemProfData") - const llvm::DenseMap &getFrameMapping() const { - return IdToFrame; - } - - // Return a const reference to the internal Id to call stacks. - LLVM_DEPRECATED("Use takeMemProfData instead", "takeMemProfData") - const llvm::DenseMap> & - getCallStacks() const { - return CSIdToCallStack; - } - - // Return a const reference to the internal function profile data. - LLVM_DEPRECATED("Use takeMemProfData instead", "takeMemProfData") - const llvm::MapVector & - getProfileData() const { - return FunctionProfileData; - } - // Take the complete profile data. IndexedMemProfData takeMemProfData() { // TODO: Once we replace the three member variables, namely IdToFrame, @@ -116,24 +96,6 @@ class MemProfReader { MemProfReader() = default; virtual ~MemProfReader() = default; - // Initialize the MemProfReader with the frame mappings and profile contents. - LLVM_DEPRECATED("Construct MemProfReader with IndexedMemProfData", - "MemProfReader") - MemProfReader( - llvm::DenseMap FrameIdMap, - llvm::MapVector ProfData); - - // Initialize the MemProfReader with the frame mappings, call stack mappings, - // and profile contents. - LLVM_DEPRECATED("Construct MemProfReader with IndexedMemProfData", - "MemProfReader") - MemProfReader( - llvm::DenseMap FrameIdMap, - llvm::DenseMap> CSIdMap, - llvm::MapVector ProfData) - : IdToFrame(std::move(FrameIdMap)), CSIdToCallStack(std::move(CSIdMap)), - FunctionProfileData(std::move(ProfData)) {} - // Initialize the MemProfReader with the given MemProf profile. MemProfReader(IndexedMemProfData MemProfData) { for (const auto &[FrameId, F] : MemProfData.Frames) diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index e4485d997c34c..7bb6c3156c43e 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -118,6 +118,10 @@ def SDTIntBinOp : SDTypeProfile<1, 2, [ // add, and, or, xor, udiv, etc. def SDTIntShiftOp : SDTypeProfile<1, 2, [ // shl, sra, srl SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<2> ]>; +def SDTIntShiftPairOp : SDTypeProfile<2, 3, [ // shl_parts, sra_parts, srl_parts + SDTCisInt<0>, SDTCisSameAs<1, 0>, + SDTCisSameAs<2, 0>, SDTCisSameAs<3, 0>, SDTCisInt<4> +]>; def SDTIntShiftDOp: SDTypeProfile<1, 3, [ // fshl, fshr SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3> ]>; @@ -422,6 +426,9 @@ def sra : SDNode<"ISD::SRA" , SDTIntShiftOp>; def shl : SDNode<"ISD::SHL" , SDTIntShiftOp>; def rotl : SDNode<"ISD::ROTL" , SDTIntShiftOp>; def rotr : SDNode<"ISD::ROTR" , SDTIntShiftOp>; +def shl_parts : SDNode<"ISD::SHL_PARTS" , SDTIntShiftPairOp>; +def sra_parts : SDNode<"ISD::SRA_PARTS" , SDTIntShiftPairOp>; +def srl_parts : SDNode<"ISD::SRL_PARTS" , SDTIntShiftPairOp>; def fshl : SDNode<"ISD::FSHL" , SDTIntShiftDOp>; def fshr : SDNode<"ISD::FSHR" , SDTIntShiftDOp>; def and : SDNode<"ISD::AND" , SDTIntBinOp, diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp index cd8594d670502..4028d5f4e2e1b 100644 --- a/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -565,10 +565,7 @@ static APInt getSizeWithOverflow(const SizeOffsetAPInt &Data) { APInt Size = Data.Size; APInt Offset = Data.Offset; - assert(!Offset.isNegative() && - "size for a pointer before the allocated object is ambiguous"); - - if (Size.ult(Offset)) + if (Offset.isNegative() || Size.ult(Offset)) return APInt::getZero(Size.getBitWidth()); return Size - Offset; @@ -756,10 +753,14 @@ OffsetSpan ObjectSizeOffsetVisitor::computeImpl(Value *V) { } // We end up pointing on a location that's outside of the original object. - // This is UB, and we'd rather return an empty location then. if (ORT.knownBefore() && ORT.Before.isNegative()) { - ORT.Before = APInt::getZero(ORT.Before.getBitWidth()); - ORT.After = APInt::getZero(ORT.Before.getBitWidth()); + // This is UB, and we'd rather return an empty location then. + if (Options.EvalMode == ObjectSizeOpts::Mode::Min || + Options.EvalMode == ObjectSizeOpts::Mode::Max) { + ORT.Before = APInt::getZero(ORT.Before.getBitWidth()); + ORT.After = APInt::getZero(ORT.Before.getBitWidth()); + } + // Otherwise it's fine, caller can handle negative offset. } return ORT; } diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 8f22a50a5e024..63f4e34074e06 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -4235,7 +4235,7 @@ static DenseMap writeMemoryProfileRadixTree( CallStackRadixTreeBuilder Builder; // We don't need a MemProfFrameIndexes map as we have already converted the // full stack id hash to a linear offset into the StackIds array. - Builder.build(std::move(CallStacks), /*MemProfFrameIndexes=*/std::nullopt, + Builder.build(std::move(CallStacks), /*MemProfFrameIndexes=*/nullptr, FrameHistogram); Stream.EmitRecord(bitc::FS_CONTEXT_RADIX_TREE_ARRAY, Builder.getRadixArray(), RadixAbbrev); diff --git a/llvm/lib/CGData/StableFunctionMap.cpp b/llvm/lib/CGData/StableFunctionMap.cpp index fe7be0c0b6e7b..4afe77d78a4fe 100644 --- a/llvm/lib/CGData/StableFunctionMap.cpp +++ b/llvm/lib/CGData/StableFunctionMap.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CGData/StableFunctionMap.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -35,21 +36,30 @@ static cl::opt GlobalMergingMaxParams( cl::desc( "The maximum number of parameters allowed when merging functions."), cl::init(std::numeric_limits::max()), cl::Hidden); -static cl::opt GlobalMergingParamOverhead( +static cl::opt GlobalMergingSkipNoParams( + "global-merging-skip-no-params", + cl::desc("Skip merging functions with no parameters."), cl::init(true), + cl::Hidden); +static cl::opt GlobalMergingInstOverhead( + "global-merging-inst-overhead", + cl::desc("The overhead cost associated with each instruction when lowering " + "to machine instruction."), + cl::init(1.2), cl::Hidden); +static cl::opt GlobalMergingParamOverhead( "global-merging-param-overhead", cl::desc("The overhead cost associated with each parameter when merging " "functions."), - cl::init(2), cl::Hidden); -static cl::opt + cl::init(2.0), cl::Hidden); +static cl::opt GlobalMergingCallOverhead("global-merging-call-overhead", cl::desc("The overhead cost associated with each " "function call when merging functions."), - cl::init(1), cl::Hidden); -static cl::opt GlobalMergingExtraThreshold( + cl::init(1.0), cl::Hidden); +static cl::opt GlobalMergingExtraThreshold( "global-merging-extra-threshold", cl::desc("An additional cost threshold that must be exceeded for merging " "to be considered beneficial."), - cl::init(0), cl::Hidden); + cl::init(0.0), cl::Hidden); unsigned StableFunctionMap::getIdOrCreateForName(StringRef Name) { auto It = NameToId.find(Name); @@ -160,21 +170,32 @@ static bool isProfitable( if (InstCount < GlobalMergingMinInstrs) return false; - unsigned ParamCount = SFS[0]->IndexOperandHashMap->size(); - if (ParamCount > GlobalMergingMaxParams) - return false; - - unsigned Benefit = InstCount * (StableFunctionCount - 1); - unsigned Cost = - (GlobalMergingParamOverhead * ParamCount + GlobalMergingCallOverhead) * - StableFunctionCount + - GlobalMergingExtraThreshold; + double Cost = 0.0; + SmallSet UniqueHashVals; + for (auto &SF : SFS) { + UniqueHashVals.clear(); + for (auto &[IndexPair, Hash] : *SF->IndexOperandHashMap) + UniqueHashVals.insert(Hash); + unsigned ParamCount = UniqueHashVals.size(); + if (ParamCount > GlobalMergingMaxParams) + return false; + // Theoretically, if ParamCount is 0, it results in identical code folding + // (ICF), which we can skip merging here since the linker already handles + // ICF. This pass would otherwise introduce unnecessary thunks that are + // merely direct jumps. However, enabling this could be beneficial depending + // on downstream passes, so we provide an option for it. + if (GlobalMergingSkipNoParams && ParamCount == 0) + return false; + Cost += ParamCount * GlobalMergingParamOverhead + GlobalMergingCallOverhead; + } + Cost += GlobalMergingExtraThreshold; + double Benefit = + InstCount * (StableFunctionCount - 1) * GlobalMergingInstOverhead; bool Result = Benefit > Cost; LLVM_DEBUG(dbgs() << "isProfitable: Hash = " << SFS[0]->Hash << ", " << "StableFunctionCount = " << StableFunctionCount << ", InstCount = " << InstCount - << ", ParamCount = " << ParamCount << ", Benefit = " << Benefit << ", Cost = " << Cost << ", Result = " << (Result ? "true" : "false") << "\n"); return Result; diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp index f77b733c6c8f6..e7b9417de8c9f 100644 --- a/llvm/lib/CodeGen/Analysis.cpp +++ b/llvm/lib/CodeGen/Analysis.cpp @@ -596,10 +596,10 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I, // Following attributes are completely benign as far as calling convention // goes, they shouldn't affect whether the call is a tail call. - for (const auto &Attr : - {Attribute::Alignment, Attribute::Dereferenceable, - Attribute::DereferenceableOrNull, Attribute::NoAlias, - Attribute::NonNull, Attribute::NoUndef, Attribute::Range}) { + for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable, + Attribute::DereferenceableOrNull, Attribute::NoAlias, + Attribute::NonNull, Attribute::NoUndef, + Attribute::Range, Attribute::NoFPClass}) { CallerAttrs.removeAttribute(Attr); CalleeAttrs.removeAttribute(Attr); } diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp index 4274c1347d648..5ca223852cbde 100644 --- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp +++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp @@ -175,8 +175,7 @@ struct CachingVPExpander { VPIntrinsic &PI); /// Lower this VP int call to a unpredicated int call. - Value *expandPredicationToIntCall(IRBuilder<> &Builder, VPIntrinsic &PI, - unsigned UnpredicatedIntrinsicID); + Value *expandPredicationToIntCall(IRBuilder<> &Builder, VPIntrinsic &PI); /// Lower this VP fp call to a unpredicated fp call. Value *expandPredicationToFPCall(IRBuilder<> &Builder, VPIntrinsic &PI, @@ -287,33 +286,19 @@ CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder, return NewBinOp; } -Value *CachingVPExpander::expandPredicationToIntCall( - IRBuilder<> &Builder, VPIntrinsic &VPI, unsigned UnpredicatedIntrinsicID) { - switch (UnpredicatedIntrinsicID) { - case Intrinsic::abs: - case Intrinsic::smax: - case Intrinsic::smin: - case Intrinsic::umax: - case Intrinsic::umin: { - Value *Op0 = VPI.getOperand(0); - Value *Op1 = VPI.getOperand(1); - Value *NewOp = Builder.CreateIntrinsic( - UnpredicatedIntrinsicID, {VPI.getType()}, {Op0, Op1}, - /*FMFSource=*/nullptr, VPI.getName()); - replaceOperation(*NewOp, VPI); - return NewOp; - } - case Intrinsic::bswap: - case Intrinsic::bitreverse: { - Value *Op = VPI.getOperand(0); - Value *NewOp = - Builder.CreateIntrinsic(UnpredicatedIntrinsicID, {VPI.getType()}, {Op}, - /*FMFSource=*/nullptr, VPI.getName()); - replaceOperation(*NewOp, VPI); - return NewOp; - } +Value *CachingVPExpander::expandPredicationToIntCall(IRBuilder<> &Builder, + VPIntrinsic &VPI) { + std::optional FID = VPI.getFunctionalIntrinsicID(); + if (!FID) + return nullptr; + SmallVector Argument; + for (unsigned i = 0; i < VPI.getNumOperands() - 3; i++) { + Argument.push_back(VPI.getOperand(i)); } - return nullptr; + Value *NewOp = Builder.CreateIntrinsic(FID.value(), {VPI.getType()}, Argument, + /*FMFSource=*/nullptr, VPI.getName()); + replaceOperation(*NewOp, VPI); + return NewOp; } Value *CachingVPExpander::expandPredicationToFPCall( @@ -323,20 +308,15 @@ Value *CachingVPExpander::expandPredicationToFPCall( switch (UnpredicatedIntrinsicID) { case Intrinsic::fabs: - case Intrinsic::sqrt: { - Value *Op0 = VPI.getOperand(0); - Value *NewOp = - Builder.CreateIntrinsic(UnpredicatedIntrinsicID, {VPI.getType()}, {Op0}, - /*FMFSource=*/nullptr, VPI.getName()); - replaceOperation(*NewOp, VPI); - return NewOp; - } + case Intrinsic::sqrt: case Intrinsic::maxnum: case Intrinsic::minnum: { - Value *Op0 = VPI.getOperand(0); - Value *Op1 = VPI.getOperand(1); + SmallVector Argument; + for (unsigned i = 0; i < VPI.getNumOperands() - 3; i++) { + Argument.push_back(VPI.getOperand(i)); + } Value *NewOp = Builder.CreateIntrinsic( - UnpredicatedIntrinsicID, {VPI.getType()}, {Op0, Op1}, + UnpredicatedIntrinsicID, {VPI.getType()}, Argument, /*FMFSource=*/nullptr, VPI.getName()); replaceOperation(*NewOp, VPI); return NewOp; @@ -438,56 +418,13 @@ CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder, Value *CachingVPExpander::expandPredicationToCastIntrinsic(IRBuilder<> &Builder, VPIntrinsic &VPI) { - Value *CastOp = nullptr; - switch (VPI.getIntrinsicID()) { - default: - llvm_unreachable("Not a VP cast intrinsic"); - case Intrinsic::vp_sext: - CastOp = - Builder.CreateSExt(VPI.getOperand(0), VPI.getType(), VPI.getName()); - break; - case Intrinsic::vp_zext: - CastOp = - Builder.CreateZExt(VPI.getOperand(0), VPI.getType(), VPI.getName()); - break; - case Intrinsic::vp_trunc: - CastOp = - Builder.CreateTrunc(VPI.getOperand(0), VPI.getType(), VPI.getName()); - break; - case Intrinsic::vp_inttoptr: - CastOp = - Builder.CreateIntToPtr(VPI.getOperand(0), VPI.getType(), VPI.getName()); - break; - case Intrinsic::vp_ptrtoint: - CastOp = - Builder.CreatePtrToInt(VPI.getOperand(0), VPI.getType(), VPI.getName()); - break; - case Intrinsic::vp_fptosi: - CastOp = - Builder.CreateFPToSI(VPI.getOperand(0), VPI.getType(), VPI.getName()); - break; + Intrinsic::ID VPID = VPI.getIntrinsicID(); + unsigned CastOpcode = VPIntrinsic::getFunctionalOpcodeForVP(VPID).value(); + assert(Instruction::isCast(CastOpcode)); + Value *CastOp = + Builder.CreateCast(Instruction::CastOps(CastOpcode), VPI.getOperand(0), + VPI.getType(), VPI.getName()); - case Intrinsic::vp_fptoui: - CastOp = - Builder.CreateFPToUI(VPI.getOperand(0), VPI.getType(), VPI.getName()); - break; - case Intrinsic::vp_sitofp: - CastOp = - Builder.CreateSIToFP(VPI.getOperand(0), VPI.getType(), VPI.getName()); - break; - case Intrinsic::vp_uitofp: - CastOp = - Builder.CreateUIToFP(VPI.getOperand(0), VPI.getType(), VPI.getName()); - break; - case Intrinsic::vp_fptrunc: - CastOp = - Builder.CreateFPTrunc(VPI.getOperand(0), VPI.getType(), VPI.getName()); - break; - case Intrinsic::vp_fpext: - CastOp = - Builder.CreateFPExt(VPI.getOperand(0), VPI.getType(), VPI.getName()); - break; - } replaceOperation(*CastOp, VPI); return CastOp; } @@ -672,8 +609,7 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) { case Intrinsic::vp_umin: case Intrinsic::vp_bswap: case Intrinsic::vp_bitreverse: - return expandPredicationToIntCall(Builder, VPI, - VPI.getFunctionalIntrinsicID().value()); + return expandPredicationToIntCall(Builder, VPI); case Intrinsic::vp_fabs: case Intrinsic::vp_sqrt: case Intrinsic::vp_maxnum: diff --git a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp index b7541effafe5c..30c2d089c3121 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp @@ -101,6 +101,12 @@ LegalityPredicate LegalityPredicates::isPointer(unsigned TypeIdx, }; } +LegalityPredicate LegalityPredicates::isPointerVector(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + return Query.Types[TypeIdx].isPointerVector(); + }; +} + LegalityPredicate LegalityPredicates::elementTypeIs(unsigned TypeIdx, LLT EltTy) { return [=](const LegalityQuery &Query) { diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 062dbbe904de3..abfca50a22bf1 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -499,6 +499,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { RTLIBCASE(LLRINT_F); } llvm_unreachable("Unknown libcall function"); +#undef RTLIBCASE_INT +#undef RTLIBCASE } /// True if an instruction is in tail position in its caller. Intended for @@ -3697,6 +3699,41 @@ LegalizerHelper::bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, return Legalized; } +// This bitcasts a shuffle vector to a different type currently of the same +// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr +// will be used instead. +// +// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask +// ===> +// <4 x s64> = G_PTRTOINT <4 x p0> +// <4 x s64> = G_PTRTOINT <4 x p0> +// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask +// <16 x p0> = G_INTTOPTR <16 x s64> +LegalizerHelper::LegalizeResult +LegalizerHelper::bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, + LLT CastTy) { + auto ShuffleMI = cast(&MI); + LLT DstTy = MRI.getType(ShuffleMI->getReg(0)); + LLT SrcTy = MRI.getType(ShuffleMI->getReg(1)); + + // We currently only handle vectors of the same size. + if (TypeIdx != 0 || + CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() || + CastTy.getElementCount() != DstTy.getElementCount()) + return UnableToLegalize; + + LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType()); + + auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1)); + auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2)); + auto Shuf = + MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask()); + MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf); + + MI.eraseFromParent(); + return Legalized; +} + /// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy. /// /// = G_EXTRACT_SUBVECTOR , N @@ -4133,6 +4170,8 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) { return bitcastInsertVectorElt(MI, TypeIdx, CastTy); case TargetOpcode::G_CONCAT_VECTORS: return bitcastConcatVector(MI, TypeIdx, CastTy); + case TargetOpcode::G_SHUFFLE_VECTOR: + return bitcastShuffleVector(MI, TypeIdx, CastTy); case TargetOpcode::G_EXTRACT_SUBVECTOR: return bitcastExtractSubvector(MI, TypeIdx, CastTy); case TargetOpcode::G_INSERT_SUBVECTOR: diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index d910e33ac40f6..be347006a81f9 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -600,12 +600,13 @@ MachineInstrBuilder MachineIRBuilder::buildCast(const DstOp &Dst, return buildCopy(Dst, Src); unsigned Opcode; - if (SrcTy.isPointer() && DstTy.isScalar()) + if (SrcTy.isPointerOrPointerVector()) Opcode = TargetOpcode::G_PTRTOINT; - else if (DstTy.isPointer() && SrcTy.isScalar()) + else if (DstTy.isPointerOrPointerVector()) Opcode = TargetOpcode::G_INTTOPTR; else { - assert(!SrcTy.isPointer() && !DstTy.isPointer() && "n G_ADDRCAST yet"); + assert(!SrcTy.isPointerOrPointerVector() && + !DstTy.isPointerOrPointerVector() && "no G_ADDRCAST yet"); Opcode = TargetOpcode::G_BITCAST; } diff --git a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp index c8f1b98c9a18e..470582885fab0 100644 --- a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp +++ b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp @@ -405,12 +405,13 @@ static ParamLocsVecTy computeParamInfo( } ParamLocsVecTy ParamLocsVec; - for (auto &[HashSeq, Locs] : HashSeqToLocs) { + for (auto &[HashSeq, Locs] : HashSeqToLocs) ParamLocsVec.push_back(std::move(Locs)); - llvm::sort(ParamLocsVec, [&](const ParamLocs &L, const ParamLocs &R) { - return L[0] < R[0]; - }); - } + + llvm::sort(ParamLocsVec, [&](const ParamLocs &L, const ParamLocs &R) { + return L[0] < R[0]; + }); + return ParamLocsVec; } diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp index ba0015d3ddacb..c31454a8affda 100644 --- a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp +++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp @@ -28,6 +28,7 @@ #include "llvm/Analysis/EHUtils.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/BasicBlockSectionUtils.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -128,6 +129,9 @@ static bool isColdBlock(const MachineBasicBlock &MBB, } bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { + // Do not split functions when -basic-block-sections=all is specified. + if (MF.getTarget().getBBSectionsType() == llvm::BasicBlockSection::All) + return false; // We target functions with profile data. Static information in the form // of exception handling code may be split to cold if user passes the // mfs-split-ehcode flag. @@ -139,6 +143,14 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { if (!TII.isFunctionSafeToSplit(MF)) return false; + // Do not split functions with BasicBlockSections profiles as they will + // be split by the BasicBlockSections pass. + auto BBSectionsProfile = + getAnalysisIfAvailable(); + if (BBSectionsProfile != nullptr && + BBSectionsProfile->getBBSPR().isFunctionHot(MF.getName())) + return false; + // Renumbering blocks here preserves the order of the blocks as // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort // blocks. Preserving the order of blocks is essential to retaining decisions @@ -201,6 +213,7 @@ void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addUsedIfAvailable(); } char MachineFunctionSplitter::ID = 0; diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 0def107f6306d..7d0bedab7cdab 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -958,7 +958,9 @@ bool MachineSinking::isWorthBreakingCriticalEdge( } } - return false; + // Let the target decide if it's worth breaking this + // critical edge for a "cheap" instruction. + return TII->shouldBreakCriticalEdgeToSink(MI); } bool MachineSinking::isLegalToBreakCriticalEdge(MachineInstr &MI, diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 3910046a1652b..b08a93ae9a6d5 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -3033,7 +3033,11 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (!MOP.getReg().isPhysical()) continue; - if (llvm::is_contained(TRI->subregs(MOP.getReg()), Reg)) + if (MOP.getReg() != Reg && + all_of(TRI->regunits(Reg), [&](const MCRegUnit RegUnit) { + return llvm::is_contained(TRI->regunits(MOP.getReg()), + RegUnit); + })) Bad = false; } } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 1480bd98c685e..63536336e9622 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2330,10 +2330,10 @@ SDValue SelectionDAGLegalize::expandLdexp(SDNode *Node) const { const APFloat::ExponentType MinExpVal = APFloat::semanticsMinExponent(FltSem); const int Precision = APFloat::semanticsPrecision(FltSem); - const SDValue MaxExp = DAG.getConstant(MaxExpVal, dl, ExpVT); - const SDValue MinExp = DAG.getConstant(MinExpVal, dl, ExpVT); + const SDValue MaxExp = DAG.getSignedConstant(MaxExpVal, dl, ExpVT); + const SDValue MinExp = DAG.getSignedConstant(MinExpVal, dl, ExpVT); - const SDValue DoubleMaxExp = DAG.getConstant(2 * MaxExpVal, dl, ExpVT); + const SDValue DoubleMaxExp = DAG.getSignedConstant(2 * MaxExpVal, dl, ExpVT); const APFloat One(FltSem, "1.0"); APFloat ScaleUpK = scalbn(One, MaxExpVal, APFloat::rmNearestTiesToEven); @@ -2375,7 +2375,7 @@ SDValue SelectionDAGLegalize::expandLdexp(SDNode *Node) const { SDValue IncN0 = DAG.getNode(ISD::ADD, dl, ExpVT, N, Increment0, NUW_NSW); SDValue ClampMinVal = - DAG.getConstant(3 * MinExpVal + 2 * Precision, dl, ExpVT); + DAG.getSignedConstant(3 * MinExpVal + 2 * Precision, dl, ExpVT); SDValue ClampN_Small = DAG.getNode(ISD::SMAX, dl, ExpVT, N, ClampMinVal); SDValue IncN1 = DAG.getNode(ISD::ADD, dl, ExpVT, ClampN_Small, Increment1, NSW); @@ -2385,8 +2385,8 @@ SDValue SelectionDAGLegalize::expandLdexp(SDNode *Node) const { SDValue ScaleDown1 = DAG.getNode(ISD::FMUL, dl, VT, ScaleDown0, ScaleDownVal); SDValue ScaleDownTwice = DAG.getSetCC( - dl, SetCCVT, N, DAG.getConstant(2 * MinExpVal + Precision, dl, ExpVT), - ISD::SETULT); + dl, SetCCVT, N, + DAG.getSignedConstant(2 * MinExpVal + Precision, dl, ExpVT), ISD::SETULT); SDValue SelectN_Small = DAG.getNode(ISD::SELECT, dl, ExpVT, ScaleDownTwice, IncN1, IncN0); @@ -5277,7 +5277,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1); else Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1, - DAG.getIntPtrConstant(0, dl)); + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)); Results.push_back(Tmp1); break; } @@ -5425,7 +5425,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp1 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other}, {Tmp3, Tmp1, Tmp2}); Tmp1 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other}, - {Tmp1.getValue(1), Tmp1, DAG.getIntPtrConstant(0, dl)}); + {Tmp1.getValue(1), Tmp1, + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}); Results.push_back(Tmp1); Results.push_back(Tmp1.getValue(1)); break; @@ -5450,7 +5451,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp4 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other}, {Tmp4, Tmp1, Tmp2, Tmp3}); Tmp4 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other}, - {Tmp4.getValue(1), Tmp4, DAG.getIntPtrConstant(0, dl)}); + {Tmp4.getValue(1), Tmp4, + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}); Results.push_back(Tmp4); Results.push_back(Tmp4.getValue(1)); break; @@ -5472,13 +5474,27 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { DAG.getIntPtrConstant(isTrunc, dl, /*isTarget=*/true))); break; } + case ISD::STRICT_FLDEXP: { + Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(1)}); + Tmp2 = Node->getOperand(2); + Tmp3 = DAG.getNode(ISD::STRICT_FLDEXP, dl, {NVT, MVT::Other}, + {Tmp1.getValue(1), Tmp1, Tmp2}); + Tmp4 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other}, + {Tmp3.getValue(1), Tmp3, + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}); + Results.push_back(Tmp4); + Results.push_back(Tmp4.getValue(1)); + break; + } case ISD::STRICT_FPOWI: Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, {Node->getOperand(0), Node->getOperand(1)}); Tmp2 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other}, {Tmp1.getValue(1), Tmp1, Node->getOperand(2)}); Tmp3 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other}, - {Tmp2.getValue(1), Tmp2, DAG.getIntPtrConstant(0, dl)}); + {Tmp2.getValue(1), Tmp2, + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}); Results.push_back(Tmp3); Results.push_back(Tmp3.getValue(1)); break; @@ -5562,7 +5578,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp2 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other}, {Tmp1.getValue(1), Tmp1}); Tmp3 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other}, - {Tmp2.getValue(1), Tmp2, DAG.getIntPtrConstant(0, dl)}); + {Tmp2.getValue(1), Tmp2, + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}); Results.push_back(Tmp3); Results.push_back(Tmp3.getValue(1)); break; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 648719bcabc37..7b9f544a5f9a4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2577,6 +2577,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ExpOp(SDNode *N) { bool IsPowI = N->getOpcode() == ISD::FPOWI || N->getOpcode() == ISD::STRICT_FPOWI; + unsigned OpOffset = IsStrict ? 1 : 0; // The integer operand is the last operand in FPOWI (or FLDEXP) (so the result // and floating point operand is already type legalized). @@ -2584,8 +2585,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ExpOp(SDNode *N) { : RTLIB::getLDEXP(N->getValueType(0)); if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) { - SDValue Op = SExtPromotedInteger(N->getOperand(1)); - return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0); + SmallVector NewOps(N->ops()); + NewOps[1 + OpOffset] = SExtPromotedInteger(N->getOperand(1 + OpOffset)); + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } // We can't just promote the exponent type in FPOWI, since we want to lower @@ -2594,7 +2596,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ExpOp(SDNode *N) { // we rewrite to a libcall here directly, letting makeLibCall handle promotion // if the target accepts it according to shouldSignExtendTypeInLibCall. - unsigned OpOffset = IsStrict ? 1 : 0; // The exponent should fit in a sizeof(int) type for the libcall to be valid. assert(DAG.getLibInfo().getIntSize() == N->getOperand(1 + OpOffset).getValueType().getSizeInBits() && diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 3a8ec3c6105bc..7c5ed04830b16 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1460,7 +1460,7 @@ SelectionDAG::getStrictFPExtendOrRound(SDValue Op, SDValue Chain, VT.bitsGT(Op.getValueType()) ? getNode(ISD::STRICT_FP_EXTEND, DL, {VT, MVT::Other}, {Chain, Op}) : getNode(ISD::STRICT_FP_ROUND, DL, {VT, MVT::Other}, - {Chain, Op, getIntPtrConstant(0, DL)}); + {Chain, Op, getIntPtrConstant(0, DL, /*isTarget=*/true)}); return std::pair(Res, SDValue(Res.getNode(), 1)); } @@ -7355,11 +7355,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return N1; break; case ISD::FP_ROUND: - assert(VT.isFloatingPoint() && - N1.getValueType().isFloatingPoint() && - VT.bitsLE(N1.getValueType()) && - N2C && (N2C->getZExtValue() == 0 || N2C->getZExtValue() == 1) && - "Invalid FP_ROUND!"); + assert(VT.isFloatingPoint() && N1.getValueType().isFloatingPoint() && + VT.bitsLE(N1.getValueType()) && N2C && + (N2C->getZExtValue() == 0 || N2C->getZExtValue() == 1) && + N2.getOpcode() == ISD::TargetConstant && "Invalid FP_ROUND!"); if (N1.getValueType() == VT) return N1; // noop conversion. break; case ISD::AssertSext: @@ -10542,7 +10541,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, assert(VTList.VTs[0].isFloatingPoint() && Ops[1].getValueType().isFloatingPoint() && VTList.VTs[0].bitsLT(Ops[1].getValueType()) && - isa(Ops[2]) && + Ops[2].getOpcode() == ISD::TargetConstant && (Ops[2]->getAsZExtVal() == 0 || Ops[2]->getAsZExtVal() == 1) && "Invalid STRICT_FP_ROUND!"); break; diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 8fbab337cab6f..bd4bcadb57d7a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -61,10 +61,10 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, // the return. Ignore following attributes because they don't affect the // call sequence. AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs()); - for (const auto &Attr : - {Attribute::Alignment, Attribute::Dereferenceable, - Attribute::DereferenceableOrNull, Attribute::NoAlias, - Attribute::NonNull, Attribute::NoUndef, Attribute::Range}) + for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable, + Attribute::DereferenceableOrNull, Attribute::NoAlias, + Attribute::NonNull, Attribute::NoUndef, + Attribute::Range, Attribute::NoFPClass}) CallerAttrs.removeAttribute(Attr); if (CallerAttrs.hasAttributes()) diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index a6159a38753cf..d407e9f0871d4 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1235,13 +1235,13 @@ void TargetPassConfig::addMachinePasses() { addPass(createMIRAddFSDiscriminatorsPass( sampleprof::FSDiscriminatorPass::PassLast)); - bool NeedsBBSections = - TM->getBBSectionsType() != llvm::BasicBlockSection::None; - // Machine function splitter uses the basic block sections feature. Both - // cannot be enabled at the same time. We do not apply machine function - // splitter if -basic-block-sections is requested. - if (!NeedsBBSections && (TM->Options.EnableMachineFunctionSplitter || - EnableMachineFunctionSplitter)) { + // Machine function splitter uses the basic block sections feature. + // When used along with `-basic-block-sections=`, the basic-block-sections + // feature takes precedence. This means functions eligible for + // basic-block-sections optimizations (`=all`, or `=list=` with function + // included in the list profile) will get that optimization instead. + if (TM->Options.EnableMachineFunctionSplitter || + EnableMachineFunctionSplitter) { const std::string ProfileFile = getFSProfileFile(TM); if (!ProfileFile.empty()) { if (EnableFSDiscriminator) { @@ -1260,7 +1260,8 @@ void TargetPassConfig::addMachinePasses() { } // We run the BasicBlockSections pass if either we need BB sections or BB // address map (or both). - if (NeedsBBSections || TM->Options.BBAddrMap) { + if (TM->getBBSectionsType() != llvm::BasicBlockSection::None || + TM->Options.BBAddrMap) { if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) { addPass(llvm::createBasicBlockSectionsProfileReaderWrapperPass( TM->getBBSectionsFuncListBuf())); diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index 2e4ce5a2782f7..7663852236594 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -1345,18 +1345,6 @@ Error IndexedMemProfReader::deserialize(const unsigned char *Start, break; } -#ifdef EXPENSIVE_CHECKS - // Go through all the records and verify that CSId has been correctly - // populated. Do this only under EXPENSIVE_CHECKS. Otherwise, we - // would defeat the purpose of OnDiskIterableChainedHashTable. - // Note that we can compare CSId against actual call stacks only for - // Version0 and Version1 because IndexedAllocationInfo::CallStack and - // IndexedMemProfRecord::CallSites are not populated in Version2. - if (Version <= memprof::Version1) - for (const auto &Record : MemProfRecordTable->data()) - verifyIndexedMemProfRecord(Record); -#endif - return Error::success(); } diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index 725ff9256fd4a..d8ab18d213e3d 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -351,9 +351,14 @@ bool InstrProfWriter::addMemProfCallStack( bool InstrProfWriter::addMemProfData(memprof::IndexedMemProfData Incoming, function_ref Warn) { - // TODO: Once we remove support for MemProf format Version V1, assert that - // the three components (frames, call stacks, and records) are either all - // empty or populated. + // Return immediately if everything is empty. + if (Incoming.Frames.empty() && Incoming.CallStacks.empty() && + Incoming.Records.empty()) + return true; + + // Otherwise, every component must be non-empty. + assert(!Incoming.Frames.empty() && !Incoming.CallStacks.empty() && + !Incoming.Records.empty()); if (MemProfData.Frames.empty()) MemProfData.Frames = std::move(Incoming.Frames); @@ -636,7 +641,7 @@ writeMemProfCallStackArray( MemProfCallStackIndexes; memprof::CallStackRadixTreeBuilder Builder; - Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes, + Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes, FrameHistogram); for (auto I : Builder.getRadixArray()) OS.write32(I); diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp index 70741ee4850bd..1c240c3858cc7 100644 --- a/llvm/lib/ProfileData/MemProf.cpp +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -335,8 +335,7 @@ template LinearCallStackId CallStackRadixTreeBuilder::encodeCallStack( const llvm::SmallVector *CallStack, const llvm::SmallVector *Prev, - std::optional> - MemProfFrameIndexes) { + const llvm::DenseMap *MemProfFrameIndexes) { // Compute the length of the common root prefix between Prev and CallStack. uint32_t CommonLen = 0; if (Prev) { @@ -381,8 +380,7 @@ template void CallStackRadixTreeBuilder::build( llvm::MapVector> &&MemProfCallStackData, - std::optional> - MemProfFrameIndexes, + const llvm::DenseMap *MemProfFrameIndexes, llvm::DenseMap &FrameHistogram) { // Take the vector portion of MemProfCallStackData. The vector is exactly // what we need to sort. Also, we no longer need its lookup capability. @@ -537,22 +535,5 @@ template llvm::DenseMap computeFrameHistogram( llvm::MapVector> &MemProfCallStackData); - -void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record) { - for (const auto &AS : Record.AllocSites) { - assert(AS.CSId == hashCallStack(AS.CallStack)); - (void)AS; - } -} - -void verifyFunctionProfileData( - const llvm::MapVector - &FunctionProfileData) { - for (const auto &[GUID, Record] : FunctionProfileData) { - (void)GUID; - verifyIndexedMemProfRecord(Record); - } -} - } // namespace memprof } // namespace llvm diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp index de5b4c23c58a0..6c5cf823fb9e0 100644 --- a/llvm/lib/ProfileData/MemProfReader.cpp +++ b/llvm/lib/ProfileData/MemProfReader.cpp @@ -228,28 +228,6 @@ std::string getBuildIdString(const SegmentEntry &Entry) { } } // namespace -MemProfReader::MemProfReader( - llvm::DenseMap FrameIdMap, - llvm::MapVector ProfData) - : IdToFrame(std::move(FrameIdMap)), - FunctionProfileData(std::move(ProfData)) { - // Populate CSId in each IndexedAllocationInfo and IndexedMemProfRecord - // while storing CallStack in CSIdToCallStack. - for (auto &KV : FunctionProfileData) { - IndexedMemProfRecord &Record = KV.second; - for (auto &AS : Record.AllocSites) { - CallStackId CSId = hashCallStack(AS.CallStack); - AS.CSId = CSId; - CSIdToCallStack.insert({CSId, AS.CallStack}); - } - for (auto &CS : Record.CallSites) { - CallStackId CSId = hashCallStack(CS); - Record.CallSiteIds.push_back(CSId); - CSIdToCallStack.insert({CSId, CS}); - } - } -} - Expected> RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary, bool KeepName) { @@ -549,8 +527,6 @@ Error RawMemProfReader::mapRawProfileToRecords() { } } - verifyFunctionProfileData(FunctionProfileData); - return Error::success(); } diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp index badaf68ab59cd..3979ca6acaf74 100644 --- a/llvm/lib/Support/Compression.cpp +++ b/llvm/lib/Support/Compression.cpp @@ -206,12 +206,13 @@ Error zstd::decompress(ArrayRef Input, uint8_t *Output, const size_t Res = ::ZSTD_decompress( Output, UncompressedSize, (const uint8_t *)Input.data(), Input.size()); UncompressedSize = Res; + if (ZSTD_isError(Res)) + return make_error(ZSTD_getErrorName(Res), + inconvertibleErrorCode()); // Tell MemorySanitizer that zstd output buffer is fully initialized. // This avoids a false report when running LLVM with uninstrumented ZLib. __msan_unpoison(Output, UncompressedSize); - return ZSTD_isError(Res) ? make_error(ZSTD_getErrorName(Res), - inconvertibleErrorCode()) - : Error::success(); + return Error::success(); } Error zstd::decompress(ArrayRef Input, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index ed2d9a07cec63..e1be825fcf7bf 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4901,13 +4901,14 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op, if (IsStrict) { SDValue Val = DAG.getNode(Op.getOpcode(), dl, {F32, MVT::Other}, {Op.getOperand(0), In}); - return DAG.getNode( - ISD::STRICT_FP_ROUND, dl, {Op.getValueType(), MVT::Other}, - {Val.getValue(1), Val.getValue(0), DAG.getIntPtrConstant(0, dl)}); + return DAG.getNode(ISD::STRICT_FP_ROUND, dl, + {Op.getValueType(), MVT::Other}, + {Val.getValue(1), Val.getValue(0), + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}); } return DAG.getNode(ISD::FP_ROUND, dl, Op.getValueType(), DAG.getNode(Op.getOpcode(), dl, F32, In), - DAG.getIntPtrConstant(0, dl)); + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)); } uint64_t VTSize = VT.getFixedSizeInBits(); @@ -4919,9 +4920,9 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op, if (IsStrict) { In = DAG.getNode(Opc, dl, {CastVT, MVT::Other}, {Op.getOperand(0), In}); - return DAG.getNode( - ISD::STRICT_FP_ROUND, dl, {VT, MVT::Other}, - {In.getValue(1), In.getValue(0), DAG.getIntPtrConstant(0, dl)}); + return DAG.getNode(ISD::STRICT_FP_ROUND, dl, {VT, MVT::Other}, + {In.getValue(1), In.getValue(0), + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}); } In = DAG.getNode(Opc, dl, CastVT, In); return DAG.getNode(ISD::FP_ROUND, dl, VT, In, @@ -4969,13 +4970,14 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, if (IsStrict) { SDValue Val = DAG.getNode(Op.getOpcode(), dl, {PromoteVT, MVT::Other}, {Op.getOperand(0), SrcVal}); - return DAG.getNode( - ISD::STRICT_FP_ROUND, dl, {Op.getValueType(), MVT::Other}, - {Val.getValue(1), Val.getValue(0), DAG.getIntPtrConstant(0, dl)}); + return DAG.getNode(ISD::STRICT_FP_ROUND, dl, + {Op.getValueType(), MVT::Other}, + {Val.getValue(1), Val.getValue(0), + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}); } return DAG.getNode(ISD::FP_ROUND, dl, Op.getValueType(), DAG.getNode(Op.getOpcode(), dl, PromoteVT, SrcVal), - DAG.getIntPtrConstant(0, dl)); + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)); }; if (Op.getValueType() == MVT::bf16) { @@ -5067,12 +5069,13 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, DAG.getNode(ISD::OR, DL, MVT::i64, RoundedBits, NeedsAdjustment); SDValue Adjusted = DAG.getNode(ISD::BITCAST, DL, MVT::f64, AdjustedBits); return IsStrict - ? DAG.getNode(ISD::STRICT_FP_ROUND, DL, - {Op.getValueType(), MVT::Other}, - {Rounded.getValue(1), Adjusted, - DAG.getIntPtrConstant(0, DL)}) + ? DAG.getNode( + ISD::STRICT_FP_ROUND, DL, + {Op.getValueType(), MVT::Other}, + {Rounded.getValue(1), Adjusted, + DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)}) : DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), Adjusted, - DAG.getIntPtrConstant(0, DL, true)); + DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); } } @@ -7109,7 +7112,7 @@ static SDValue LowerFLDEXP(SDValue Op, SelectionDAG &DAG) { DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, X.getValueType(), FScale, Zero); if (X.getValueType() != XScalarTy) Final = DAG.getNode(ISD::FP_ROUND, DL, XScalarTy, Final, - DAG.getIntPtrConstant(1, SDLoc(Op))); + DAG.getIntPtrConstant(1, SDLoc(Op), /*isTarget=*/true)); return Final; } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index ec7bb71fd111f..7a1e401bca18c 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -266,16 +266,7 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, return false; } - const TargetMachine &TM = getTLI()->getTargetMachine(); - - const FeatureBitset &CallerBits = - TM.getSubtargetImpl(*Caller)->getFeatureBits(); - const FeatureBitset &CalleeBits = - TM.getSubtargetImpl(*Callee)->getFeatureBits(); - - // Inline a callee if its target-features are a subset of the callers - // target-features. - return (CallerBits & CalleeBits) == CalleeBits; + return BaseT::areInlineCompatible(Caller, Callee); } bool AArch64TTIImpl::areTypesABICompatible( diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index c8f01068f7218..ad31f29c04599 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -840,13 +840,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder(G_PTRTOINT) .legalFor({{s64, p0}, {v2s64, v2p0}}) .widenScalarToNextPow2(0, 64) - .clampScalar(0, s64, s64); + .clampScalar(0, s64, s64) + .clampMaxNumElements(0, s64, 2); getActionDefinitionsBuilder(G_INTTOPTR) .unsupportedIf([&](const LegalityQuery &Query) { return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits(); }) - .legalFor({{p0, s64}, {v2p0, v2s64}}); + .legalFor({{p0, s64}, {v2p0, v2s64}}) + .clampMaxNumElements(1, s64, 2); // Casts for 32 and 64-bit width type are just copies. // Same for 128-bit width type, except they are on the FPR bank. @@ -1053,7 +1055,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) if (DstTy != SrcTy) return false; return llvm::is_contained( - {v2s64, v2p0, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy); + {v2s64, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy); }) // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we // just want those lowered into G_BUILD_VECTOR @@ -1079,7 +1081,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .clampNumElements(0, v8s8, v16s8) .clampNumElements(0, v4s16, v8s16) .clampNumElements(0, v4s32, v4s32) - .clampNumElements(0, v2s64, v2s64); + .clampNumElements(0, v2s64, v2s64) + .bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) { + // Bitcast pointers vector to i64. + const LLT DstTy = Query.Types[0]; + return std::pair(0, LLT::vector(DstTy.getElementCount(), 64)); + }); getActionDefinitionsBuilder(G_CONCAT_VECTORS) .legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}}) @@ -1296,6 +1303,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .clampNumElements(0, v4s16, v8s16) .clampNumElements(0, v2s32, v4s32) .clampMaxNumElements(0, s64, 2) + .scalarizeIf(scalarOrEltWiderThan(0, 64), 0) .moreElementsToNextPow2(0) .lower(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index d3543015d667f..83a74b4a43590 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -149,6 +149,12 @@ def FeatureMinimum3Maximum3F16 : SubtargetFeature<"minimum3-maximum3-f16", "Has v_minimum3_f16 and v_maximum3_f16 instructions" >; +def FeatureMinimum3Maximum3PKF16 : SubtargetFeature<"minimum3-maximum3-pkf16", + "HasMinimum3Maximum3PKF16", + "true", + "Has v_pk_minimum3_f16 and v_pk_maximum3_f16 instructions" +>; + def FeatureSupportsXNACK : SubtargetFeature<"xnack-support", "SupportsXNACK", "true", @@ -372,10 +378,69 @@ def FeatureGFX940Insts : SubtargetFeature<"gfx940-insts", "Additional instructions for GFX940+" >; +def FeaturePermlane16Swap : SubtargetFeature<"permlane16-swap", + "HasPermlane16Swap", + "true", + "Has v_permlane16_swap_b32 instructions" +>; + +def FeaturePermlane32Swap : SubtargetFeature<"permlane32-swap", + "HasPermlane32Swap", + "true", + "Has v_permlane32_swap_b32 instructions" +>; + +def FeatureFP8ConversionScaleInsts : SubtargetFeature<"fp8-cvt-scale-insts", + "HasFP8ConversionScaleInsts", + "true", + "Has fp8 conversion scale instructions" +>; + +def FeatureBF8ConversionScaleInsts : SubtargetFeature<"bf8-cvt-scale-insts", + "HasBF8ConversionScaleInsts", + "true", + "Has bf8 conversion scale instructions" +>; + +def FeatureFP4ConversionScaleInsts : SubtargetFeature<"fp4-cvt-scale-insts", + "HasFP4ConversionScaleInsts", + "true", + "Has fp4 conversion scale instructions" +>; + +def FeatureFP6BF6ConversionScaleInsts : SubtargetFeature<"fp6bf6-cvt-scale-insts", + "HasFP6BF6ConversionScaleInsts", + "true", + "Has fp6 and bf6 conversion scale instructions" +>; + +def FeatureF16BF16ToFP6BF6ConversionScaleInsts : SubtargetFeature<"f16bf16-to-fp6bf6-cvt-scale-insts", + "HasF16BF16ToFP6BF6ConversionScaleInsts", + "true", + "Has f16bf16 to fp6bf6 conversion scale instructions" +>; + +def FeatureAshrPkInsts : SubtargetFeature<"ashr-pk-insts", + "HasAshrPkInsts", + "true", + "Has Arithmetic Shift Pack instructions" +>; + def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts", "GFX950Insts", "true", - "Additional instructions for GFX950+" + "Additional instructions for GFX950+", + [FeaturePermlane16Swap, + FeaturePermlane32Swap, + FeatureAshrPkInsts, + FeatureFP8ConversionScaleInsts, + FeatureBF8ConversionScaleInsts, + FeatureFP4ConversionScaleInsts, + FeatureFP6BF6ConversionScaleInsts, + FeatureF16BF16ToFP6BF6ConversionScaleInsts, + FeatureMinimum3Maximum3F32, + FeatureMinimum3Maximum3PKF16 + ] >; def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts", @@ -676,7 +741,7 @@ def FeatureDot8Insts : SubtargetFeature<"dot8-insts", def FeatureDot9Insts : SubtargetFeature<"dot9-insts", "HasDot9Insts", "true", - "Has v_dot2_f16_f16, v_dot2_bf16_bf16, v_dot2_f32_bf16 instructions" + "Has v_dot2_f16_f16, v_dot2_bf16_bf16 instructions" >; def FeatureDot10Insts : SubtargetFeature<"dot10-insts", @@ -691,6 +756,19 @@ def FeatureDot11Insts : SubtargetFeature<"dot11-insts", "Has v_dot4_f32_fp8_fp8, v_dot4_f32_fp8_bf8, v_dot4_f32_bf8_fp8, v_dot4_f32_bf8_bf8 instructions" >; +def FeatureDot12Insts : SubtargetFeature<"dot12-insts", + "HasDot12Insts", + "true", + "Has v_dot2_f32_bf16 instructions" +>; + +def FeatureDot13Insts : SubtargetFeature<"dot13-insts", + "HasDot13Insts", + "true", + "Has v_dot2c_f32_bf16 instructions" +>; + + def FeatureMAIInsts : SubtargetFeature<"mai-insts", "HasMAIInsts", "true", @@ -984,6 +1062,12 @@ def FeatureVmemWriteVgprInOrder : SubtargetFeature<"vmem-write-vgpr-in-order", "VMEM instructions of the same type write VGPR results in order" >; +def FeatureBitOp3Insts : SubtargetFeature<"bitop3-insts", + "HasBitOp3Insts", + "true", + "Has v_bitop3_b32/v_bitop3_b16 instructions" +>; + def FeaturePrngInst : SubtargetFeature<"prng-inst", "HasPrngInst", "true", @@ -1511,7 +1595,15 @@ def FeatureISAVersion9_5_Common : FeatureSet< FeatureCvtFP8VOP1Bug, FeatureGFX950Insts, FeaturePrngInst, - FeatureBF16ConversionInsts + FeatureBF16ConversionInsts, + FeatureBitOp3Insts, + FeatureFP8ConversionScaleInsts, + FeatureBF8ConversionScaleInsts, + FeatureFP4ConversionScaleInsts, + FeatureFP6BF6ConversionScaleInsts, + FeatureDot12Insts, + FeatureDot13Insts, + FeatureAtomicBufferPkAddBF16Inst ])>; def FeatureISAVersion9_4_0 : FeatureSet< @@ -1639,6 +1731,7 @@ def FeatureISAVersion11_Common : FeatureSet< FeatureDot8Insts, FeatureDot9Insts, FeatureDot10Insts, + FeatureDot12Insts, FeatureNSAEncoding, FeaturePartialNSAEncoding, FeatureShaderCyclesRegister, @@ -1722,6 +1815,7 @@ def FeatureISAVersion12 : FeatureSet< FeatureDot9Insts, FeatureDot10Insts, FeatureDot11Insts, + FeatureDot12Insts, FeatureNSAEncoding, FeaturePartialNSAEncoding, FeatureShaderCyclesHiLoRegisters, @@ -1987,6 +2081,14 @@ def HasGFX950Insts : Predicate<"Subtarget->hasGFX950Insts()">, AssemblerPredicate<(all_of FeatureGFX950Insts)>; +def HasPermlane16Swap : + Predicate<"Subtarget->hasPermlane16Swap()">, + AssemblerPredicate<(all_of FeaturePermlane16Swap)>; + +def HasPermlane32Swap : + Predicate<"Subtarget->hasPermlane32Swap()">, + AssemblerPredicate<(all_of FeaturePermlane32Swap)>; + def isGFX8GFX9NotGFX940 : Predicate<"!Subtarget->hasGFX940Insts() &&" "(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" @@ -2051,6 +2153,10 @@ def HasMinimum3Maximum3F16 : Predicate<"Subtarget->hasMinimum3Maximum3F16()">, AssemblerPredicate<(all_of FeatureMinimum3Maximum3F16)>; +def HasMinimum3Maximum3PKF16 : + Predicate<"Subtarget->hasMinimum3Maximum3PKF16()">, + AssemblerPredicate<(all_of FeatureMinimum3Maximum3PKF16)>; + def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, AssemblerPredicate<(all_of FeatureFlatAddressSpace)>; @@ -2286,6 +2392,12 @@ def HasDot10Insts : Predicate<"Subtarget->hasDot10Insts()">, def HasDot11Insts : Predicate<"Subtarget->hasDot11Insts()">, AssemblerPredicate<(all_of FeatureDot11Insts)>; +def HasDot12Insts : Predicate<"Subtarget->hasDot12Insts()">, + AssemblerPredicate<(all_of FeatureDot12Insts)>; + +def HasDot13Insts : Predicate<"Subtarget->hasDot13Insts()">, + AssemblerPredicate<(all_of FeatureDot13Insts)>; + def HasGetWaveIdInst : Predicate<"Subtarget->hasGetWaveIdInst()">, AssemblerPredicate<(all_of FeatureGetWaveIdInst)>; @@ -2371,9 +2483,27 @@ def HasSALUFloatInsts : Predicate<"Subtarget->hasSALUFloatInsts()">, def HasPseudoScalarTrans : Predicate<"Subtarget->hasPseudoScalarTrans()">, AssemblerPredicate<(all_of FeaturePseudoScalarTrans)>; +def HasBitOp3Insts : Predicate<"Subtarget->hasBitOp3Insts()">, + AssemblerPredicate<(all_of FeatureBitOp3Insts)>; + def HasPrngInst : Predicate<"Subtarget->hasPrngInst()">, AssemblerPredicate<(all_of FeaturePrngInst)>; +def HasFP8ConversionScaleInsts : Predicate<"Subtarget->hasFP8ConversionScaleInsts()">, + AssemblerPredicate<(all_of FeatureFP8ConversionScaleInsts)>; + +def HasBF8ConversionScaleInsts : Predicate<"Subtarget->hasBF8ConversionScaleInsts()">, + AssemblerPredicate<(all_of FeatureBF8ConversionScaleInsts)>; + +def HasFP4ConversionScaleInsts : Predicate<"Subtarget->hasFP4ConversionScaleInsts()">, + AssemblerPredicate<(all_of FeatureFP4ConversionScaleInsts)>; + +def HasFP6BF6ConversionScaleInsts : Predicate<"Subtarget->hasFP6BF6ConversionScaleInsts()">, + AssemblerPredicate<(all_of FeatureFP6BF6ConversionScaleInsts)>; + +def HasF16BF16ToFP6BF6ConversionScaleInsts : Predicate<"Subtarget->hasF16BF16ToFP6BF6ConversionScaleInsts()">, + AssemblerPredicate<(all_of FeatureF16BF16ToFP6BF6ConversionScaleInsts)>; + def HasGDS : Predicate<"Subtarget->hasGDS()">; def HasGWS : Predicate<"Subtarget->hasGWS()">; @@ -2388,6 +2518,9 @@ def HasScalarDwordx3Loads : Predicate<"Subtarget->hasScalarDwordx3Loads()">; def HasXF32Insts : Predicate<"Subtarget->hasXF32Insts()">, AssemblerPredicate<(all_of FeatureXF32Insts)>; +def HasAshrPkInsts : Predicate<"Subtarget->hasAshrPkInsts()">, + AssemblerPredicate<(all_of FeatureAshrPkInsts)>; + // Include AMDGPU TD files include "SISchedule.td" include "GCNProcessors.td" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index 88fa96bd049f2..1b909568fc555 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -395,6 +395,9 @@ def gi_as_i8timm : GICustomOperandRenderer<"renderTruncTImm">, def gi_as_i1timm : GICustomOperandRenderer<"renderTruncTImm">, GISDNodeXFormEquiv; +def gi_as_i1timm_zext : GICustomOperandRenderer<"renderZextBoolTImm">, + GISDNodeXFormEquiv; + def gi_NegateImm : GICustomOperandRenderer<"renderNegateImm">, GISDNodeXFormEquiv; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 151d56292b53d..7d78e9cd7eab6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -408,7 +408,8 @@ SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const { unsigned AS = cast(N)->getAddressSpace(); if (AS == AMDGPUAS::LOCAL_ADDRESS) { if (Subtarget->ldsRequiresM0Init()) - return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); + return glueCopyToM0( + N, CurDAG->getSignedTargetConstant(-1, SDLoc(N), MVT::i32)); } else if (AS == AMDGPUAS::REGION_ADDRESS) { MachineFunction &MF = CurDAG->getMachineFunction(); unsigned Value = MF.getInfo()->getGDSSize(); @@ -1724,7 +1725,7 @@ bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr, } VAddr = Addr; - Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32); + Offset = CurDAG->getSignedTargetConstant(OffsetVal, SDLoc(), MVT::i32); return true; } @@ -1832,7 +1833,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N, } if (SAddr) { - Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32); + Offset = CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32); return true; } } @@ -1848,7 +1849,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N, CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32, CurDAG->getTargetConstant(0, SDLoc(), MVT::i32)); VOffset = SDValue(VMov, 0); - Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32); + Offset = CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32); return true; } @@ -1903,13 +1904,13 @@ bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr, SDValue AddOffset = SAddr.getOpcode() == ISD::TargetFrameIndex ? getMaterializedScalarImm32(Lo_32(RemainderOffset), DL) - : CurDAG->getTargetConstant(RemainderOffset, DL, MVT::i32); + : CurDAG->getSignedTargetConstant(RemainderOffset, DL, MVT::i32); SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, MVT::i32, SAddr, AddOffset), 0); } - Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i32); + Offset = CurDAG->getSignedTargetConstant(COffsetVal, DL, MVT::i32); return true; } @@ -2058,7 +2059,7 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, std::optional EncodedOffset = AMDGPU::getSMRDEncodedOffset( *Subtarget, ByteOffset, IsBuffer, HasSOffset); if (EncodedOffset && Offset && !Imm32Only) { - *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); + *Offset = CurDAG->getSignedTargetConstant(*EncodedOffset, SL, MVT::i32); return true; } @@ -2777,6 +2778,31 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) { case Intrinsic::amdgcn_interp_p1_f16: SelectInterpP1F16(N); return; + case Intrinsic::amdgcn_permlane16_swap: + case Intrinsic::amdgcn_permlane32_swap: { + if ((IntrID == Intrinsic::amdgcn_permlane16_swap && + !Subtarget->hasPermlane16Swap()) || + (IntrID == Intrinsic::amdgcn_permlane32_swap && + !Subtarget->hasPermlane32Swap())) { + SelectCode(N); // Hit the default error + return; + } + + Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap + ? AMDGPU::V_PERMLANE16_SWAP_B32_e64 + : AMDGPU::V_PERMLANE32_SWAP_B32_e64; + + SmallVector NewOps(N->op_begin() + 1, N->op_end()); + if (ConvGlueNode) + NewOps.push_back(SDValue(ConvGlueNode, 0)); + + bool FI = N->getConstantOperandVal(3); + NewOps[2] = CurDAG->getTargetConstant( + FI ? AMDGPU::DPP::DPP_FI_1 : AMDGPU::DPP::DPP_FI_0, SDLoc(), MVT::i32); + + CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), NewOps); + return; + } default: SelectCode(N); break; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 3cc4bd92f6471..d77508227b076 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2333,7 +2333,7 @@ SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op, SDValue RHS = Op.getOperand(1); SDValue Zero = DAG.getConstant(0, DL, VT); - SDValue NegOne = DAG.getConstant(-1, DL, VT); + SDValue NegOne = DAG.getAllOnesConstant(DL, VT); if (VT == MVT::i32) { if (SDValue Res = LowerDIVREM24(Op, DAG, true)) @@ -3794,7 +3794,11 @@ static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0, uint32_t Offset, if (Width + Offset < 32) { uint32_t Shl = static_cast(Src0) << (32 - Offset - Width); IntTy Result = static_cast(Shl) >> (32 - Width); - return DAG.getConstant(Result, DL, MVT::i32); + if constexpr (std::is_signed_v) { + return DAG.getSignedConstant(Result, DL, MVT::i32); + } else { + return DAG.getConstant(Result, DL, MVT::i32); + } } return DAG.getConstant(Src0 >> Offset, DL, MVT::i32); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index 087de1bed86f7..18a09c39a0638 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -1024,6 +1024,12 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { } break; } + case Intrinsic::amdgcn_wavefrontsize: { + if (ST->isWaveSizeKnown()) + return IC.replaceInstUsesWith( + II, ConstantInt::get(II.getType(), ST->getWavefrontSize())); + break; + } case Intrinsic::amdgcn_wqm_vote: { // wqm_vote is identity when the argument is constant. if (!isa(II.getArgOperand(0))) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 702f6e67c5527..bec294a945d2f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -100,8 +100,8 @@ def AMDGPUtc_return_chain: SDNode<"AMDGPUISD::TC_RETURN_CHAIN", >; def AMDGPUtrap : SDNode<"AMDGPUISD::TRAP", - SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>, - [SDNPHasChain, SDNPVariadic, SDNPSideEffect, SDNPInGlue] + SDTypeProfile<0, 1, [SDTCisVT<0, i16>]>, + [SDNPHasChain, SDNPVariadic, SDNPSideEffect, SDNPOptInGlue] >; def AMDGPUconstdata_ptr : SDNode< diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 6dbe1fa62e748..39bec6c7f2f56 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1105,6 +1105,9 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const { case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8: case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8: return selectSMFMACIntrin(I); + case Intrinsic::amdgcn_permlane16_swap: + case Intrinsic::amdgcn_permlane32_swap: + return selectPermlaneSwapIntrin(I, IntrinsicID); default: return selectImpl(I, *CoverageInfo); } @@ -3581,6 +3584,29 @@ bool AMDGPUInstructionSelector::selectSMFMACIntrin(MachineInstr &MI) const { return true; } +bool AMDGPUInstructionSelector::selectPermlaneSwapIntrin( + MachineInstr &MI, Intrinsic::ID IntrID) const { + if (IntrID == Intrinsic::amdgcn_permlane16_swap && + !Subtarget->hasPermlane16Swap()) + return false; + if (IntrID == Intrinsic::amdgcn_permlane32_swap && + !Subtarget->hasPermlane32Swap()) + return false; + + unsigned Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap + ? AMDGPU::V_PERMLANE16_SWAP_B32_e64 + : AMDGPU::V_PERMLANE32_SWAP_B32_e64; + + MI.removeOperand(2); + MI.setDesc(TII.get(Opcode)); + MI.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); + + MachineOperand &FI = MI.getOperand(4); + FI.setImm(FI.getImm() ? AMDGPU::DPP::DPP_FI_1 : AMDGPU::DPP::DPP_FI_0); + + return constrainSelectedInstRegOperands(MI, TII, TRI, RBI); +} + bool AMDGPUInstructionSelector::selectWaveAddress(MachineInstr &MI) const { Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); @@ -5769,6 +5795,12 @@ void AMDGPUInstructionSelector::renderTruncTImm(MachineInstrBuilder &MIB, MIB.addImm(Op.getImm()); } +void AMDGPUInstructionSelector::renderZextBoolTImm(MachineInstrBuilder &MIB, + const MachineInstr &MI, + int OpIdx) const { + MIB.addImm(MI.getOperand(OpIdx).getImm() != 0); +} + void AMDGPUInstructionSelector::renderOpSelTImm(MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 563e40267f04b..5b31cb827c971 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -145,6 +145,7 @@ class AMDGPUInstructionSelector final : public InstructionSelector { bool selectGlobalLoadLds(MachineInstr &MI) const; bool selectBVHIntrinsic(MachineInstr &I) const; bool selectSMFMACIntrin(MachineInstr &I) const; + bool selectPermlaneSwapIntrin(MachineInstr &I, Intrinsic::ID IntrID) const; bool selectWaveAddress(MachineInstr &I) const; bool selectStackRestore(MachineInstr &MI) const; bool selectNamedBarrierInit(MachineInstr &I, Intrinsic::ID IID) const; @@ -328,6 +329,8 @@ class AMDGPUInstructionSelector final : public InstructionSelector { void renderTruncTImm(MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const; + void renderZextBoolTImm(MachineInstrBuilder &MIB, const MachineInstr &MI, + int OpIdx) const; void renderOpSelTImm(MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 2d12b0c316a53..b06bd4e334614 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3146,6 +3146,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl( case Intrinsic::amdgcn_interp_inreg_p2_f16: case Intrinsic::amdgcn_interp_p10_rtz_f16: case Intrinsic::amdgcn_interp_p2_rtz_f16: + case Intrinsic::amdgcn_permlane16_swap: + case Intrinsic::amdgcn_permlane32_swap: applyDefaultMapping(OpdMapper); return; case Intrinsic::amdgcn_permlane16: @@ -4526,6 +4528,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_fdot2_bf16_bf16: case Intrinsic::amdgcn_fdot2_f16_f16: case Intrinsic::amdgcn_fdot2_f32_bf16: + case Intrinsic::amdgcn_fdot2c_f32_bf16: case Intrinsic::amdgcn_sudot4: case Intrinsic::amdgcn_sudot8: case Intrinsic::amdgcn_dot4_f32_fp8_bf8: @@ -4540,6 +4543,14 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_cvt_pk_bf8_f32: case Intrinsic::amdgcn_cvt_sr_fp8_f32: case Intrinsic::amdgcn_cvt_sr_bf8_f32: + case Intrinsic::amdgcn_cvt_scalef32_pk32_fp6_f16: + case Intrinsic::amdgcn_cvt_scalef32_pk32_bf6_f16: + case Intrinsic::amdgcn_cvt_scalef32_pk32_fp6_bf16: + case Intrinsic::amdgcn_cvt_scalef32_pk32_bf6_bf16: + case Intrinsic::amdgcn_ashr_pk_i8_i32: + case Intrinsic::amdgcn_ashr_pk_u8_i32: + case Intrinsic::amdgcn_cvt_scalef32_2xpk16_fp6_f32: + case Intrinsic::amdgcn_cvt_scalef32_2xpk16_bf6_f32: case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16: case Intrinsic::amdgcn_wmma_f16_16x16x16_f16: case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied: @@ -4860,6 +4871,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); break; } + case Intrinsic::amdgcn_permlane16_swap: + case Intrinsic::amdgcn_permlane32_swap: { + unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + OpdsMapping[0] = OpdsMapping[1] = OpdsMapping[3] = OpdsMapping[4] = + AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize); + break; + } case Intrinsic::amdgcn_ballot: { unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); unsigned SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); @@ -4958,6 +4976,10 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_global_atomic_ordered_add_b64: case Intrinsic::amdgcn_global_load_tr_b64: case Intrinsic::amdgcn_global_load_tr_b128: + case Intrinsic::amdgcn_ds_read_tr4_b64: + case Intrinsic::amdgcn_ds_read_tr6_b96: + case Intrinsic::amdgcn_ds_read_tr8_b64: + case Intrinsic::amdgcn_ds_read_tr16_b64: return getDefaultMappingAllVGPR(MI); case Intrinsic::amdgcn_ds_ordered_add: case Intrinsic::amdgcn_ds_ordered_swap: { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td index 2ea254e64b8cb..10175557fadc7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -326,6 +326,8 @@ def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; +def : SourceOfDivergence; +def : SourceOfDivergence; foreach intr = AMDGPUMFMAIntrinsics908 in def : SourceOfDivergence; @@ -343,6 +345,11 @@ def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; +def : SourceOfDivergence; +def : SourceOfDivergence; +def : SourceOfDivergence; +def : SourceOfDivergence; + // The dummy boolean output is divergent from the IR's perspective, // but the mask results are uniform. These produce a divergent and // uniform result, so the returned struct is collectively divergent. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index ece26a4adb375..742f4e6e80f1a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -50,6 +50,11 @@ class AMDGPUSubtarget { bool GCN3Encoding = false; bool Has16BitInsts = false; bool HasTrue16BitInsts = false; + bool HasFP8ConversionScaleInsts = false; + bool HasBF8ConversionScaleInsts = false; + bool HasFP4ConversionScaleInsts = false; + bool HasFP6BF6ConversionScaleInsts = false; + bool HasF16BF16ToFP6BF6ConversionScaleInsts = false; bool EnableRealTrue16Insts = false; bool HasBF16ConversionInsts = false; bool HasMadMixInsts = false; @@ -175,6 +180,16 @@ class AMDGPUSubtarget { return HasMadMixInsts; } + bool hasFP8ConversionScaleInsts() const { return HasFP8ConversionScaleInsts; } + + bool hasBF8ConversionScaleInsts() const { return HasBF8ConversionScaleInsts; } + + bool hasFP4ConversionScaleInsts() const { return HasFP4ConversionScaleInsts; } + + bool hasFP6BF6ConversionScaleInsts() const { return HasFP6BF6ConversionScaleInsts; } + + bool hasF16BF16ToFP6BF6ConversionScaleInsts() const { return HasF16BF16ToFP6BF6ConversionScaleInsts; } + bool hasMadMacF32Insts() const { return HasMadMacF32Insts || !isGCN(); } diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 34bdab39d367b..afd35842ba87f 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -171,6 +171,7 @@ class AMDGPUOperand : public MCParsedAsmOperand { ImmTyWaitVAVDst, ImmTyWaitVMVSrc, ImmTyByteSel, + ImmTyBitOp3, }; // Immediate operand kind. @@ -410,6 +411,7 @@ class AMDGPUOperand : public MCParsedAsmOperand { bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } bool isNegLo() const { return isImmTy(ImmTyNegLo); } bool isNegHi() const { return isImmTy(ImmTyNegHi); } + bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); } bool isRegOrImm() const { return isReg() || isImm(); @@ -1138,6 +1140,7 @@ class AMDGPUOperand : public MCParsedAsmOperand { case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break; case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break; case ImmTyByteSel: OS << "ByteSel" ; break; + case ImmTyBitOp3: OS << "BitOp3"; break; } // clang-format on } @@ -1913,6 +1916,9 @@ class AMDGPUAsmParser : public MCTargetAsmParser { ParseStatus parseEndpgm(OperandVector &Operands); ParseStatus parseVOPD(OperandVector &Operands); + + ParseStatus parseBitOp3(OperandVector &Operands); + AMDGPUOperand::Ptr defaultBitOp3() const; }; } // end anonymous namespace @@ -8818,7 +8824,9 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; - if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi || + if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi || + Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi || + Opc == AMDGPU::V_CVT_SR_BF8_F32_vi || Opc == AMDGPU::V_CVT_SR_FP8_F32_vi || Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 || Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) { @@ -8841,6 +8849,11 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, Inst.addOperand(Inst.getOperand(0)); } + int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3); + if (BitOp3Idx != -1) { + addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3); + } + // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 // instruction, and then figure out where to actually put the modifiers @@ -9748,6 +9761,20 @@ ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) { bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } +//===----------------------------------------------------------------------===// +// BITOP3 +//===----------------------------------------------------------------------===// + +ParseStatus AMDGPUAsmParser::parseBitOp3(OperandVector &Operands) { + ParseStatus Res = + parseIntWithPrefix("bitop3", Operands, AMDGPUOperand::ImmTyBitOp3); + return Res; +} + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBitOp3() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBitOp3); +} + //===----------------------------------------------------------------------===// // Split Barrier //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index e5978aee2b39a..a288c58def5cb 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -3296,6 +3296,8 @@ defm BUFFER_WBINVL1_VOL : MUBUF_Real_vi <0x3f>; defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_vi <0x4e>; +defm BUFFER_ATOMIC_PK_ADD_BF16 : MUBUF_Real_Atomic_vi <0x52>; + defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Real_Atomic_vi <0x4d>; let SubtargetPredicate = isGFX90APlus in { diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index 061ffda2498f4..7cbd6d2dc6209 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -294,6 +294,12 @@ multiclass DS_1A_RET_mc { + let has_m0_read = 0 in { + def "" : DS_1A_RET; + } +} + class DS_1A_RET_Tied : DS_1A_RET; @@ -744,6 +750,13 @@ multiclass DSAtomicRetNoRetPatIntrinsic_mc; } // let SubtargetPredicate = isGFX12Plus +let WaveSizePredicate = isWave64, SubtargetPredicate = HasGFX950Insts, mayStore = 0 in { + defm DS_READ_B64_TR_B4 : DS_1A_RET_NoM0<"ds_read_b64_tr_b4", VReg_64>; + defm DS_READ_B64_TR_B8 : DS_1A_RET_NoM0<"ds_read_b64_tr_b8", VReg_64>; + defm DS_READ_B64_TR_B16 : DS_1A_RET_NoM0<"ds_read_b64_tr_b16", VReg_64>; + defm DS_READ_B96_TR_B6 : DS_1A_RET_NoM0<"ds_read_b96_tr_b6", VReg_96>; +} + //===----------------------------------------------------------------------===// // DS Patterns //===----------------------------------------------------------------------===// @@ -1179,6 +1192,18 @@ def : GCNPat < sub0) >; +class DSLoadTrPat : GCNPat < + (vt (node (DS1Addr1Offset i32:$ptr, i32:$offset))), + (inst $ptr, Offset:$offset, (i1 0)) +>; + +let SubtargetPredicate = HasGFX950Insts in { + def : DSLoadTrPat ; + def : DSLoadTrPat ; + def : DSLoadTrPat ; + def : DSLoadTrPat ; +} + //===----------------------------------------------------------------------===// // Target-specific instruction encodings. //===----------------------------------------------------------------------===// @@ -1748,3 +1773,11 @@ def DS_PK_ADD_F16_vi : DS_Real_vi<0x17, DS_PK_ADD_F16>; def DS_PK_ADD_RTN_F16_vi : DS_Real_vi<0xb7, DS_PK_ADD_RTN_F16>; def DS_PK_ADD_BF16_vi : DS_Real_vi<0x18, DS_PK_ADD_BF16>; def DS_PK_ADD_RTN_BF16_vi : DS_Real_vi<0xb8, DS_PK_ADD_RTN_BF16>; + +//===----------------------------------------------------------------------===// +// GFX950. +//===----------------------------------------------------------------------===// +def DS_READ_B64_TR_B4_vi : DS_Real_vi<0x0e0, DS_READ_B64_TR_B4>; +def DS_READ_B96_TR_B6_vi : DS_Real_vi<0x0e1, DS_READ_B96_TR_B6>; +def DS_READ_B64_TR_B8_vi : DS_Real_vi<0x0e2, DS_READ_B64_TR_B8>; +def DS_READ_B64_TR_B16_vi : DS_Real_vi<0x0e3, DS_READ_B64_TR_B16>; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index f90121a86c846..5908351805721 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -45,26 +45,10 @@ using namespace llvm; using DecodeStatus = llvm::MCDisassembler::DecodeStatus; -static const MCSubtargetInfo &addDefaultWaveSize(const MCSubtargetInfo &STI, - MCContext &Ctx) { - if (!STI.hasFeature(AMDGPU::FeatureWavefrontSize64) && - !STI.hasFeature(AMDGPU::FeatureWavefrontSize32)) { - MCSubtargetInfo &STICopy = Ctx.getSubtargetCopy(STI); - // If there is no default wave size it must be a generation before gfx10, - // these have FeatureWavefrontSize64 in their definition already. For gfx10+ - // set wave32 as a default. - STICopy.ToggleFeature(AMDGPU::FeatureWavefrontSize32); - return STICopy; - } - - return STI; -} - AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII) - : MCDisassembler(addDefaultWaveSize(STI, Ctx), Ctx), MCII(MCII), - MRI(*Ctx.getRegisterInfo()), MAI(*Ctx.getAsmInfo()), - TargetMaxInstBytes(MAI.getMaxInstLength(&STI)), + : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()), + MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)), CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) { // ToDo: AMDGPUDisassembler supports only VI ISA. if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus()) @@ -278,6 +262,7 @@ DECODE_OPERAND_REG_8(VGPR_32_Lo128) DECODE_OPERAND_REG_8(VReg_64) DECODE_OPERAND_REG_8(VReg_96) DECODE_OPERAND_REG_8(VReg_128) +DECODE_OPERAND_REG_8(VReg_192) DECODE_OPERAND_REG_8(VReg_256) DECODE_OPERAND_REG_8(VReg_288) DECODE_OPERAND_REG_8(VReg_352) @@ -581,6 +566,10 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS)) break; + if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) && + tryDecodeInst(DecoderTableGFX95064, MI, QW, Address, CS)) + break; + // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and // v_mad_mixhi_f16 for FMA variants. Try to decode using this special // table first so we print the correct name. @@ -642,6 +631,10 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS)) break; + if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) && + tryDecodeInst(DecoderTableGFX95032, MI, DW, Address, CS)) + break; + if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) && tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS)) break; @@ -1546,6 +1539,7 @@ unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const { case OPWV232: return VReg_64RegClassID; case OPW96: return VReg_96RegClassID; case OPW128: return VReg_128RegClassID; + case OPW192: return VReg_192RegClassID; case OPW160: return VReg_160RegClassID; case OPW256: return VReg_256RegClassID; case OPW288: return VReg_288RegClassID; @@ -1842,28 +1836,28 @@ MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const { STI.hasFeature(AMDGPU::FeatureGFX10)) && "SDWAVopcDst should be present only on GFX9+"); - bool IsWave64 = STI.hasFeature(AMDGPU::FeatureWavefrontSize64); + bool IsWave32 = STI.hasFeature(AMDGPU::FeatureWavefrontSize32); if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) { Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK; int TTmpIdx = getTTmpIdx(Val); if (TTmpIdx >= 0) { - auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32); + auto TTmpClsId = getTtmpClassId(IsWave32 ? OPW32 : OPW64); return createSRegOperand(TTmpClsId, TTmpIdx); } if (Val > SGPR_MAX) { - return IsWave64 ? decodeSpecialReg64(Val) : decodeSpecialReg32(Val); + return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val); } - return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val); + return createSRegOperand(getSgprClassId(IsWave32 ? OPW32 : OPW64), Val); } - return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO); + return createRegOperand(IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC); } MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const { - return STI.hasFeature(AMDGPU::FeatureWavefrontSize64) - ? decodeSrcOp(OPW64, Val) - : decodeSrcOp(OPW32, Val); + return STI.hasFeature(AMDGPU::FeatureWavefrontSize32) + ? decodeSrcOp(OPW32, Val) + : decodeSrcOp(OPW64, Val); } MCOperand AMDGPUDisassembler::decodeSplitBarrier(unsigned Val) const { diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index 3e20a2ab9e66c..b19e4b74a394c 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -219,6 +219,7 @@ class AMDGPUDisassembler : public MCDisassembler { OPW96, OPW128, OPW160, + OPW192, OPW256, OPW288, OPW320, diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 44afccb0690d0..4c37ef8855a5b 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -168,7 +168,11 @@ static bool isPermlane(const MachineInstr &MI) { Opcode == AMDGPU::V_PERMLANE64_B32 || Opcode == AMDGPU::V_PERMLANEX16_B32_e64 || Opcode == AMDGPU::V_PERMLANE16_VAR_B32_e64 || - Opcode == AMDGPU::V_PERMLANEX16_VAR_B32_e64; + Opcode == AMDGPU::V_PERMLANEX16_VAR_B32_e64 || + Opcode == AMDGPU::V_PERMLANE16_SWAP_B32_e32 || + Opcode == AMDGPU::V_PERMLANE16_SWAP_B32_e64 || + Opcode == AMDGPU::V_PERMLANE32_SWAP_B32_e32 || + Opcode == AMDGPU::V_PERMLANE32_SWAP_B32_e64; } static bool isLdsDma(const MachineInstr &MI) { @@ -395,6 +399,9 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) { SIInstrInfo::isDS(*MI)) return std::max(WaitStates, checkMAILdStHazards(MI)); + if (ST.hasGFX950Insts() && isPermlane(*MI)) + return std::max(WaitStates, checkPermlaneHazards(MI)); + return WaitStates; } @@ -1200,6 +1207,13 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) { fixRequiredExportPriority(MI); } +static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI, + const MachineInstr &MI) { + return (TII.isVOPC(MI) || + (MI.isCompare() && (TII.isVOP3(MI) || TII.isSDWA(MI)))) && + MI.modifiesRegister(AMDGPU::EXEC, &TRI); +} + bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) { if (!ST.hasVcmpxPermlaneHazard() || !isPermlane(*MI)) return false; @@ -1207,9 +1221,7 @@ bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) { const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); auto IsHazardFn = [TII, TRI](const MachineInstr &MI) { - return (TII->isVOPC(MI) || - ((TII->isVOP3(MI) || TII->isSDWA(MI)) && MI.isCompare())) && - MI.modifiesRegister(AMDGPU::EXEC, TRI); + return isVCmpXWritesExec(*TII, *TRI, MI); }; auto IsExpiredFn = [](const MachineInstr &MI, int) { @@ -2232,12 +2244,25 @@ int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) { } static int -GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses) { - // 2 pass -> 3 - // 4 pass -> 5 - // 8 pass -> 9 - // 16 pass -> 17 - return NumPasses + 1; +GFX940_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates(int NumPasses, + bool IsGFX950) { + // xdl def cycles | gfx940 | gfx950 + // 2 pass | 3 4 + // 4 pass | 5 6 + // 8 pass | 9 10 + // 16 pass | 17 18 + return NumPasses + 1 + IsGFX950; +} + +static int +GFX940_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates(int NumPasses, + bool IsGFX950) { + // xdl def cycles | gfx940 | gfx950 + // 2 pass | 3 3 + // 4 pass | 5 6 + // 8 pass | 9 10 + // 16 pass | 17 18 + return NumPasses + 1 + (NumPasses != 2 && IsGFX950); } static int @@ -2300,12 +2325,14 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { const int SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates = 9; const int SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates = 17; const int DMFMA16x16WritesVGPROverlappedSrcCWaitStates = 9; + const int GFX950_DMFMA16x16WritesVGPROverlappedSrcCWaitStates = 17; const int DMFMA4x4WritesVGPROverlappedSrcCWaitStates = 4; const int SMFMA4x4WritesVGPROverlappedSrcABWaitStates = 5; const int SMFMA16x16WritesVGPROverlappedSrcABWaitStates = 11; const int SMFMA32x32WritesVGPROverlappedSrcABWaitStates = 19; const int DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates = 6; const int DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 11; + const int GFX950_DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 19; const int DMFMA4x4WritesVGPRFullSrcCWaitStates = 4; const int GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates = 2; const int MaxWaitStates = 19; @@ -2357,7 +2384,10 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64: case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64: if (!isXDL(ST, *MI)) - NeedWaitStates = DMFMA16x16WritesVGPROverlappedSrcCWaitStates; + NeedWaitStates = + ST.hasGFX950Insts() + ? GFX950_DMFMA16x16WritesVGPROverlappedSrcCWaitStates + : DMFMA16x16WritesVGPROverlappedSrcCWaitStates; break; case AMDGPU::V_MFMA_F64_4X4X4F64_e64: case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64: @@ -2372,8 +2402,11 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { NeedWaitStates = isXDL(ST, *MI1) - ? GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates( - NumPasses) + ? (isXDL(ST, *MI) + ? GFX940_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates( + NumPasses, ST.hasGFX950Insts()) + : GFX940_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates( + NumPasses, ST.hasGFX950Insts())) : GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates( NumPasses); break; @@ -2408,7 +2441,10 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { case AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64: case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64: case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64: - NeedWaitStates = DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates; + NeedWaitStates = + ST.hasGFX950Insts() + ? GFX950_DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates + : DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates; break; case AMDGPU::V_MFMA_F64_4X4X4F64_e64: case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64: @@ -2505,6 +2541,46 @@ int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) { return WaitStatesNeeded; } +int GCNHazardRecognizer::checkPermlaneHazards(MachineInstr *MI) { + assert(!ST.hasVcmpxPermlaneHazard() && + "this is a different vcmpx+permlane hazard"); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + const SIInstrInfo *TII = ST.getInstrInfo(); + + auto IsVCmpXWritesExecFn = [TII, TRI](const MachineInstr &MI) { + return isVCmpXWritesExec(*TII, *TRI, MI); + }; + + auto IsVALUFn = [](const MachineInstr &MI) { + return SIInstrInfo::isVALU(MI); + }; + + const int VCmpXWritesExecWaitStates = 4; + const int VALUWritesVDstWaitStates = 2; + int WaitStatesNeeded = 0; + + for (const MachineOperand &Op : MI->explicit_uses()) { + if (!Op.isReg() || !TRI->isVGPR(MF.getRegInfo(), Op.getReg())) + continue; + Register Reg = Op.getReg(); + + int WaitStatesSinceDef = + VALUWritesVDstWaitStates - + getWaitStatesSinceDef(Reg, IsVALUFn, + /*MaxWaitStates=*/VALUWritesVDstWaitStates); + WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesSinceDef); + if (WaitStatesNeeded >= VALUWritesVDstWaitStates) + break; + } + + int VCmpXHazardWaits = + VCmpXWritesExecWaitStates - + getWaitStatesSince(IsVCmpXWritesExecFn, VCmpXWritesExecWaitStates); + + WaitStatesNeeded = std::max(WaitStatesNeeded, VCmpXHazardWaits); + return WaitStatesNeeded; +} + static int GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) { // 2 pass -> 4 // 4 pass -> 6 @@ -2603,6 +2679,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { const int DMFMA16x16WriteVgprMemExpReadWaitStates = 18; const int DMFMA4x4WriteVgprVALUReadWaitStates = 6; const int DMFMA16x16WriteVgprVALUReadWaitStates = 11; + const int GFX950_DMFMA16x16WriteVgprVALUReadWaitStates = 19; const int DotWriteSameDotReadSrcAB = 3; const int DotWriteDifferentVALURead = 3; const int DMFMABetweenVALUWriteVMEMRead = 2; @@ -2663,9 +2740,12 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { break; case 8: case 16: - NeedWaitStates = IsMemOrExport - ? DMFMA16x16WriteVgprMemExpReadWaitStates - : DMFMA16x16WriteVgprVALUReadWaitStates; + NeedWaitStates = + IsMemOrExport + ? DMFMA16x16WriteVgprMemExpReadWaitStates + : (ST.hasGFX950Insts() + ? GFX950_DMFMA16x16WriteVgprVALUReadWaitStates + : DMFMA16x16WriteVgprVALUReadWaitStates); break; default: llvm_unreachable("unexpected dgemm"); diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h index adb2278c48eeb..83ce100c58f0a 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -134,6 +134,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer { int checkMFMAPadding(MachineInstr *MI); int checkMAIVALUHazards(MachineInstr *MI); int checkMAILdStHazards(MachineInstr *MI); + int checkPermlaneHazards(MachineInstr *MI); public: GCNHazardRecognizer(const MachineFunction &MF); diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td index 3403cbab526d4..a86c76bb6075e 100644 --- a/llvm/lib/Target/AMDGPU/GCNProcessors.td +++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td @@ -9,11 +9,11 @@ // The code produced for "generic" is only useful for tests and cannot // reasonably be expected to execute on any particular target. def : ProcessorModel<"generic", NoSchedModel, - [FeatureWavefrontSize64, FeatureGDS, FeatureGWS] + [FeatureGDS, FeatureGWS] >; def : ProcessorModel<"generic-hsa", NoSchedModel, - [FeatureWavefrontSize64, FeatureGDS, FeatureGWS, FeatureFlatAddressSpace] + [FeatureGDS, FeatureGWS, FeatureFlatAddressSpace] >; //===------------------------------------------------------------===// @@ -204,7 +204,7 @@ def : ProcessorModel<"gfx942", SIDPGFX940FullSpeedModel, FeatureISAVersion9_4_2.Features >; -def : ProcessorModel<"gfx950", SIDPGFX940FullSpeedModel, +def : ProcessorModel<"gfx950", SIDPGFX950FullSpeedModel, FeatureISAVersion9_5_0.Features >; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp index 6233ca2eb4f1d..51361b7594056 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp @@ -100,14 +100,16 @@ GCNSubtarget &GCNSubtarget::initializeSubtargetDependencies(const Triple &TT, if (Gen == AMDGPUSubtarget::INVALID) { Gen = TT.getOS() == Triple::AMDHSA ? AMDGPUSubtarget::SEA_ISLANDS : AMDGPUSubtarget::SOUTHERN_ISLANDS; - } - - if (!hasFeature(AMDGPU::FeatureWavefrontSize32) && - !hasFeature(AMDGPU::FeatureWavefrontSize64)) { + // Assume wave64 for the unknown target, if not explicitly set. + if (getWavefrontSizeLog2() == 0) + WavefrontSizeLog2 = 6; + } else if (!hasFeature(AMDGPU::FeatureWavefrontSize32) && + !hasFeature(AMDGPU::FeatureWavefrontSize64)) { // If there is no default wave size it must be a generation before gfx10, // these have FeatureWavefrontSize64 in their definition already. For gfx10+ // set wave32 as a default. ToggleFeature(AMDGPU::FeatureWavefrontSize32); + WavefrontSizeLog2 = getGeneration() >= AMDGPUSubtarget::GFX10 ? 5 : 6; } // We don't support FP64 for EG/NI atm. @@ -147,10 +149,6 @@ GCNSubtarget &GCNSubtarget::initializeSubtargetDependencies(const Triple &TT, !getFeatureBits().test(AMDGPU::FeatureCuMode)) LocalMemorySize *= 2; - // Don't crash on invalid devices. - if (WavefrontSizeLog2 == 0) - WavefrontSizeLog2 = 5; - HasFminFmaxLegacy = getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS; HasSMulHi = getGeneration() >= AMDGPUSubtarget::GFX9; @@ -166,7 +164,7 @@ GCNSubtarget &GCNSubtarget::initializeSubtargetDependencies(const Triple &TT, void GCNSubtarget::checkSubtargetFeatures(const Function &F) const { LLVMContext &Ctx = F.getContext(); - if (hasFeature(AMDGPU::FeatureWavefrontSize32) == + if (hasFeature(AMDGPU::FeatureWavefrontSize32) && hasFeature(AMDGPU::FeatureWavefrontSize64)) { Ctx.diagnose(DiagnosticInfoUnsupported( F, "must specify exactly one of wavefrontsize32 and wavefrontsize64")); diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index f3f96940c1f44..ea5e159fdd836 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -156,6 +156,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasDot9Insts = false; bool HasDot10Insts = false; bool HasDot11Insts = false; + bool HasDot12Insts = false; + bool HasDot13Insts = false; bool HasMAIInsts = false; bool HasFP8Insts = false; bool HasFP8ConversionInsts = false; @@ -220,7 +222,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasSALUFloatInsts = false; bool HasPseudoScalarTrans = false; bool HasRestrictedSOffset = false; + bool HasBitOp3Insts = false; bool HasPrngInst = false; + bool HasPermlane16Swap = false; + bool HasPermlane32Swap = false; bool HasVcmpxPermlaneHazard = false; bool HasVMEMtoScalarWriteHazard = false; bool HasSMEMtoVectorWriteHazard = false; @@ -242,8 +247,11 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasForceStoreSC0SC1 = false; bool HasRequiredExportPriority = false; bool HasVmemWriteVgprInOrder = false; + bool HasAshrPkInsts = false; bool HasMinimum3Maximum3F32 = false; bool HasMinimum3Maximum3F16 = false; + bool HasMinimum3Maximum3PKF16 = false; + bool RequiresCOV6 = false; // Dummy feature to use for assembler in tablegen. @@ -820,6 +828,14 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return HasDot11Insts; } + bool hasDot12Insts() const { + return HasDot12Insts; + } + + bool hasDot13Insts() const { + return HasDot13Insts; + } + bool hasMAIInsts() const { return HasMAIInsts; } @@ -1319,6 +1335,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, /// \returns true if the target has instructions with xf32 format support. bool hasXF32Insts() const { return HasXF32Insts; } + bool hasBitOp3Insts() const { return HasBitOp3Insts; } + + bool hasPermlane16Swap() const { return HasPermlane16Swap; } + bool hasPermlane32Swap() const { return HasPermlane32Swap; } + bool hasAshrPkInsts() const { return HasAshrPkInsts; } + bool hasMinimum3Maximum3F32() const { return HasMinimum3Maximum3F32; } @@ -1327,6 +1349,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return HasMinimum3Maximum3F16; } + bool hasMinimum3Maximum3PKF16() const { + return HasMinimum3Maximum3PKF16; + } + /// \returns The maximum number of instructions that can be enclosed in an /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that /// instruction. @@ -1564,6 +1590,14 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return getWavefrontSize() == 64; } + /// Returns if the wavesize of this subtarget is known reliable. This is false + /// only for the a default target-cpu that does not have an explicit + /// +wavefrontsize target feature. + bool isWaveSizeKnown() const { + return hasFeature(AMDGPU::FeatureWavefrontSize32) || + hasFeature(AMDGPU::FeatureWavefrontSize64); + } + const TargetRegisterClass *getBoolRC() const { return getRegisterInfo()->getBoolRC(); } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index 344028c4b4868..c389f3a13d952 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -649,9 +649,9 @@ void AMDGPUInstPrinter::printDefaultVccOperand(bool FirstOperand, raw_ostream &O) { if (!FirstOperand) O << ", "; - printRegOperand(STI.hasFeature(AMDGPU::FeatureWavefrontSize64) - ? AMDGPU::VCC - : AMDGPU::VCC_LO, + printRegOperand(STI.hasFeature(AMDGPU::FeatureWavefrontSize32) + ? AMDGPU::VCC_LO + : AMDGPU::VCC, O, MRI); if (FirstOperand) O << ", "; @@ -1714,4 +1714,18 @@ void AMDGPUInstPrinter::printNamedInt(const MCInst *MI, unsigned OpNo, O << ' ' << Prefix << ':' << (PrintInHex ? formatHex(V) : formatDec(V)); } +void AMDGPUInstPrinter::printBitOp3(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + uint8_t Imm = MI->getOperand(OpNo).getImm(); + if (!Imm) + return; + + O << " bitop3:"; + if (Imm <= 10) + O << formatDec(Imm); + else + O << formatHex(static_cast(Imm)); +} + #include "AMDGPUGenAsmWriter.inc" diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h index 5a7d6cf7ba595..071e0a9d0fee6 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -163,6 +163,9 @@ class AMDGPUInstPrinter : public MCInstPrinter { const MCSubtargetInfo &STI, raw_ostream &O, StringRef Prefix, bool PrintInHex, bool AlwaysPrint); + void printBitOp3(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); + public: static void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O, StringRef Asm, StringRef Default = ""); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp index 29be64625811f..c692895d84c00 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp @@ -77,7 +77,22 @@ static MCSubtargetInfo * createAMDGPUMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { if (TT.getArch() == Triple::r600) return createR600MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); - return createAMDGPUMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); + + MCSubtargetInfo *STI = + createAMDGPUMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); + + // FIXME: We should error for the default target. + if (!STI->hasFeature(AMDGPU::FeatureWavefrontSize64) && + !STI->hasFeature(AMDGPU::FeatureWavefrontSize32)) { + // If there is no default wave size it must be a generation before gfx10, + // these have FeatureWavefrontSize64 in their definition already. For gfx10+ + // set wave32 as a default. + STI->ToggleFeature(AMDGPU::isGFX10Plus(*STI) + ? AMDGPU::FeatureWavefrontSize32 + : AMDGPU::FeatureWavefrontSize64); + } + + return STI; } static MCInstPrinter *createAMDGPUMCInstPrinter(const Triple &T, diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index 1b88fdd3ab2e1..c2e952418f1be 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -919,7 +919,7 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT); HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT); } else if (CompareVT == MVT::i32) { - HWTrue = DAG.getConstant(-1, DL, CompareVT); + HWTrue = DAG.getAllOnesConstant(DL, CompareVT); HWFalse = DAG.getConstant(0, DL, CompareVT); } else { @@ -949,7 +949,7 @@ SDValue R600TargetLowering::lowerADDRSPACECAST(SDValue Op, unsigned DestAS = ASC->getDestAddressSpace(); if (isNullConstant(Op.getOperand(0)) && SrcAS == AMDGPUAS::FLAT_ADDRESS) - return DAG.getConstant(TM.getNullPointerValue(DestAS), SL, VT); + return DAG.getSignedConstant(TM.getNullPointerValue(DestAS), SL, VT); return Op; } @@ -1750,11 +1750,11 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, } return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0), - SelectCC.getOperand(0), // LHS - SelectCC.getOperand(1), // RHS - DAG.getConstant(-1, DL, MVT::i32), // True - DAG.getConstant(0, DL, MVT::i32), // False - SelectCC.getOperand(4)); // CC + SelectCC.getOperand(0), // LHS + SelectCC.getOperand(1), // RHS + DAG.getAllOnesConstant(DL, MVT::i32), // True + DAG.getConstant(0, DL, MVT::i32), // False + SelectCC.getOperand(4)); // CC } // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index e520dfff1016b..f326416a32417 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1382,7 +1382,11 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return true; } case Intrinsic::amdgcn_global_load_tr_b64: - case Intrinsic::amdgcn_global_load_tr_b128: { + case Intrinsic::amdgcn_global_load_tr_b128: + case Intrinsic::amdgcn_ds_read_tr4_b64: + case Intrinsic::amdgcn_ds_read_tr6_b96: + case Intrinsic::amdgcn_ds_read_tr8_b64: + case Intrinsic::amdgcn_ds_read_tr16_b64: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(CI.getType()); Info.ptrVal = CI.getOperand(0); @@ -1477,6 +1481,10 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II, case Intrinsic::amdgcn_atomic_cond_sub_u32: case Intrinsic::amdgcn_ds_append: case Intrinsic::amdgcn_ds_consume: + case Intrinsic::amdgcn_ds_read_tr4_b64: + case Intrinsic::amdgcn_ds_read_tr6_b96: + case Intrinsic::amdgcn_ds_read_tr8_b64: + case Intrinsic::amdgcn_ds_read_tr16_b64: case Intrinsic::amdgcn_ds_ordered_add: case Intrinsic::amdgcn_ds_ordered_swap: case Intrinsic::amdgcn_flat_atomic_fmax_num: @@ -4019,10 +4027,11 @@ SDValue SITargetLowering::lowerDYNAMIC_STACKALLOCImpl(SDValue Op, Align StackAlign = TFL->getStackAlign(); Tmp1 = DAG.getNode(Opc, dl, VT, SP, ScaledSize); // Value if (Alignment && *Alignment > StackAlign) { - Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, - DAG.getConstant(-(uint64_t)Alignment->value() - << Subtarget->getWavefrontSizeLog2(), - dl, VT)); + Tmp1 = DAG.getNode( + ISD::AND, dl, VT, Tmp1, + DAG.getSignedConstant(-(uint64_t)Alignment->value() + << Subtarget->getWavefrontSizeLog2(), + dl, VT)); } Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain @@ -6771,10 +6780,10 @@ SDValue SITargetLowering::lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const { // TODO: This should be a generic narrowing legalization, and can easily be // for GlobalISel. - SDValue MinExp = DAG.getConstant(minIntN(16), DL, ExpVT); + SDValue MinExp = DAG.getSignedConstant(minIntN(16), DL, ExpVT); SDValue ClampMin = DAG.getNode(ISD::SMAX, DL, ExpVT, Exp, MinExp); - SDValue MaxExp = DAG.getConstant(maxIntN(16), DL, ExpVT); + SDValue MaxExp = DAG.getSignedConstant(maxIntN(16), DL, ExpVT); SDValue Clamp = DAG.getNode(ISD::SMIN, DL, ExpVT, ClampMin, MaxExp); SDValue TruncExp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Clamp); @@ -7542,11 +7551,11 @@ SDValue SITargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, SDValue Vec0 = SVN->getOperand(VecIdx0); SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Vec0, - DAG.getConstant(EltIdx0, SL, MVT::i32)); + DAG.getSignedConstant(EltIdx0, SL, MVT::i32)); SDValue Vec1 = SVN->getOperand(VecIdx1); SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Vec1, - DAG.getConstant(EltIdx1, SL, MVT::i32)); + DAG.getSignedConstant(EltIdx1, SL, MVT::i32)); Pieces.push_back(DAG.getBuildVector(PackVT, SL, {Elt0, Elt1})); } } @@ -9618,7 +9627,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, // On GFX12 lower s_barrier into s_barrier_signal_imm and s_barrier_wait if (ST.hasSplitBarriers()) { SDValue K = - DAG.getTargetConstant(AMDGPU::Barrier::WORKGROUP, DL, MVT::i32); + DAG.getSignedTargetConstant(AMDGPU::Barrier::WORKGROUP, DL, MVT::i32); SDValue BarSignal = SDValue(DAG.getMachineNode(AMDGPU::S_BARRIER_SIGNAL_IMM, DL, MVT::Other, K, Op.getOperand(0)), @@ -10747,7 +10756,7 @@ SDValue SITargetLowering::LowerFDIV16(SDValue Op, SelectionDAG &DAG) const { Tmp = DAG.getNode(ISD::BITCAST, SL, MVT::f32, TmpCast); Quot = DAG.getNode(ISD::FADD, SL, MVT::f32, Tmp, Quot, Op->getFlags()); SDValue RDst = DAG.getNode(ISD::FP_ROUND, SL, MVT::f16, Quot, - DAG.getConstant(0, SL, MVT::i32)); + DAG.getTargetConstant(0, SL, MVT::i32)); return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f16, RDst, RHS, LHS, Op->getFlags()); } @@ -11173,8 +11182,9 @@ SDValue SITargetLowering::lowerFSQRTF32(SDValue Op, SelectionDAG &DAG) const { SqrtS = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, SqrtID, SqrtX, Flags); SDValue SqrtSAsInt = DAG.getNode(ISD::BITCAST, DL, MVT::i32, SqrtS); - SDValue SqrtSNextDownInt = DAG.getNode(ISD::ADD, DL, MVT::i32, SqrtSAsInt, - DAG.getConstant(-1, DL, MVT::i32)); + SDValue SqrtSNextDownInt = + DAG.getNode(ISD::ADD, DL, MVT::i32, SqrtSAsInt, + DAG.getAllOnesConstant(DL, MVT::i32)); SDValue SqrtSNextDown = DAG.getNode(ISD::BITCAST, DL, VT, SqrtSNextDownInt); SDValue NegSqrtSNextDown = @@ -11296,7 +11306,7 @@ SDValue SITargetLowering::lowerFSQRTF64(SDValue Op, SelectionDAG &DAG) const { SDValue SqrtRet = DAG.getNode(ISD::FMA, DL, MVT::f64, SqrtD1, SqrtH1, SqrtS2); - SDValue ScaleDownFactor = DAG.getConstant(-128, DL, MVT::i32); + SDValue ScaleDownFactor = DAG.getSignedConstant(-128, DL, MVT::i32); SDValue ScaleDown = DAG.getNode(ISD::SELECT, DL, MVT::i32, Scaling, ScaleDownFactor, ZeroInt); SqrtRet = DAG.getNode(ISD::FLDEXP, DL, MVT::f64, SqrtRet, ScaleDown, Flags); @@ -14689,7 +14699,7 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N, (CRHS->isZero() && (CC == ISD::SETEQ || CC == ISD::SETGE || CC == ISD::SETULE))) return DAG.getNode(ISD::XOR, SL, MVT::i1, LHS.getOperand(0), - DAG.getConstant(-1, SL, MVT::i1)); + DAG.getAllOnesConstant(SL, MVT::i1)); if ((CRHS->isAllOnes() && (CC == ISD::SETEQ || CC == ISD::SETLE || CC == ISD::SETUGE)) || (CRHS->isZero() && @@ -14715,7 +14725,7 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N, if ((CF == CRHSVal && CC == ISD::SETEQ) || (CT == CRHSVal && CC == ISD::SETNE)) return DAG.getNode(ISD::XOR, SL, MVT::i1, LHS.getOperand(0), - DAG.getConstant(-1, SL, MVT::i1)); + DAG.getAllOnesConstant(SL, MVT::i1)); if ((CF == CRHSVal && CC == ISD::SETNE) || (CT == CRHSVal && CC == ISD::SETEQ)) return LHS.getOperand(0); @@ -16677,8 +16687,8 @@ SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const { const TargetRegisterClass *RC = TargetLoweringBase::getRegClassFor(VT, false); const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); if (RC == &AMDGPU::VReg_1RegClass && !isDivergent) - return Subtarget->getWavefrontSize() == 64 ? &AMDGPU::SReg_64RegClass - : &AMDGPU::SReg_32RegClass; + return Subtarget->isWave64() ? &AMDGPU::SReg_64RegClass + : &AMDGPU::SReg_32RegClass; if (!TRI->isSGPRClass(RC) && !isDivergent) return TRI->getEquivalentSGPRClass(RC); if (TRI->isSGPRClass(RC) && isDivergent) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index b7c008235fb7a..4a94d69029794 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4468,7 +4468,11 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI, // Check output modifiers return !hasModifiersSet(MI, AMDGPU::OpName::omod) && !hasModifiersSet(MI, AMDGPU::OpName::clamp) && - !hasModifiersSet(MI, AMDGPU::OpName::byte_sel); + !hasModifiersSet(MI, AMDGPU::OpName::byte_sel) && + // TODO: Can we avoid checking bound_ctrl/fi here? + // They are only used by permlane*_swap special case. + !hasModifiersSet(MI, AMDGPU::OpName::bound_ctrl) && + !hasModifiersSet(MI, AMDGPU::OpName::fi); } // Set VCC operand with all flags from \p Orig, except for setting it as @@ -7699,8 +7703,8 @@ void SIInstrInfo::lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst, // Insert a trivial select instead of creating a copy, because a copy from // SCC would semantically mean just copying a single bit, but we may need // the result to be a vector condition mask that needs preserving. - unsigned Opcode = (ST.getWavefrontSize() == 64) ? AMDGPU::S_CSELECT_B64 - : AMDGPU::S_CSELECT_B32; + unsigned Opcode = + ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32; auto NewSelect = BuildMI(MBB, MII, DL, get(Opcode), NewCondReg).addImm(-1).addImm(0); NewSelect->getOperand(3).setIsUndef(Cond.isUndef()); @@ -8712,7 +8716,7 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const { } // IndexStride = 64 / 32. - uint64_t IndexStride = ST.getWavefrontSize() == 64 ? 3 : 2; + uint64_t IndexStride = ST.isWave64() ? 3 : 2; Rsrc23 |= IndexStride << AMDGPU::RSRC_INDEX_STRIDE_SHIFT; // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17]. diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 885f72494a8f6..f9cb6bb8d297a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -def isWave32 : Predicate<"Subtarget->getWavefrontSize() == 32">, +def isWave32 : Predicate<"Subtarget->isWave32()">, AssemblerPredicate <(all_of FeatureWavefrontSize32)>; -def isWave64 : Predicate<"Subtarget->getWavefrontSize() == 64">, +def isWave64 : Predicate<"Subtarget->isWave64()">, AssemblerPredicate <(all_of FeatureWavefrontSize64)>; class AMDGPUMnemonicAlias @@ -807,32 +807,38 @@ def as_i1timm : SDNodeXFormgetTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1); }]>; +def as_i1timm_zext : SDNodeXFormgetTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32); +}]>; + def as_i8imm : SDNodeXFormgetTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i8); }]>; def as_i8timm : SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); + return CurDAG->getSignedTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); }]>; def as_i16imm : SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); + return CurDAG->getSignedTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); }]>; def as_i16timm : SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); + // Explicit cast, as this is used with both signed and unsigned immediates. + return CurDAG->getSignedTargetConstant(int16_t(N->getSExtValue()), SDLoc(N), + MVT::i16); }]>; def as_i32imm: SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); + return CurDAG->getSignedTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); }]>; def as_i32timm: SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); + return CurDAG->getSignedTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); }]>; def as_i64imm: SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64); + return CurDAG->getSignedTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64); }]>; def cond_as_i32imm: SDNodeXForm { let Validator = "isUInt<2>"; } +def BitOp3 : CustomOperand; +def bitop3_0 : DefaultOperand; + class KImmFPOperand : ImmOperand { let OperandNamespace = "AMDGPU"; let OperandType = "OPERAND_KIMM"#vt.Size; @@ -1687,7 +1696,10 @@ class getVALUDstForVT { defvar op16 = !if(IsTrue16, !if (IsVOP3Encoding, VOPDstOperand_t16, VOPDstOperand_t16Lo128), VOPDstOperand); - RegisterOperand ret = !cond(!eq(VT.Size, 256) : VOPDstOperand, + RegisterOperand ret = !cond(!eq(VT.Size, 1024) : VOPDstOperand, + !eq(VT.Size, 512) : VOPDstOperand, + !eq(VT.Size, 256) : VOPDstOperand, + !eq(VT.Size, 192) : VOPDstOperand, !eq(VT.Size, 128) : VOPDstOperand, !eq(VT.Size, 64) : VOPDstOperand, !eq(VT.Size, 32) : VOPDstOperand, @@ -1743,7 +1755,9 @@ class getSOPSrcForVT { // Returns the vreg register class to use for source operand given VT class getVregSrcForVT { RegisterOperand ret = - !cond(!eq(VT.Size, 128) : RegisterOperand, + !cond(!eq(VT.Size, 512) : RegisterOperand, + !eq(VT.Size, 192) : RegisterOperand, + !eq(VT.Size, 128) : RegisterOperand, !eq(VT.Size, 96) : RegisterOperand, !eq(VT.Size, 64) : RegisterOperand, !eq(VT.Size, 48) : RegisterOperand, @@ -1776,6 +1790,8 @@ class getVOP3SrcForVT { !eq(VT, v2i16) : VSrc_v2b16, !eq(VT, v4f16) : AVSrc_64, !eq(VT, v4bf16) : AVSrc_64, + !eq(VT.Size, 512) : VRegSrc_512, + !eq(VT.Size, 192) : VRegSrc_192, !eq(VT.Size, 128) : VRegSrc_128, !eq(VT.Size, 96) : VRegSrc_96, !eq(VT.Size, 64) : VSrc_b64, @@ -2819,6 +2835,12 @@ def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], /*EnableClamp= def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>; def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>; def VOP_V2BF16_F32_F32 : VOPProfile <[v2bf16, f32, f32, untyped]>; +def VOP_V32F32_V6I32_F32 : VOPProfile <[v32f32, v6i32, f32, untyped]>; +def VOP_V32F16_V6I32_F32 : VOPProfile <[v32f16, v6i32, f32, untyped]>; +def VOP_V32BF16_V6I32_F32 : VOPProfile <[v32bf16, v6i32, f32, untyped]>; +def VOP_V6I32_V32F16_F32 : VOPProfile<[v6i32, v32f16, f32, untyped]>; +def VOP_V6I32_V32BF16_F32 : VOPProfile<[v6i32, v32bf16, f32, untyped]>; +def VOP_V6I32_V16F32_V16F32_F32 : VOPProfile<[v6i32, v16f32, v16f32, f32]>; def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>; def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>; @@ -2834,6 +2856,7 @@ def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>; def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>; def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>; def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>; +def VOP_I16_I32_I32_I32 : VOPProfile <[i16, i32, i32, i32]>; def VOP_F32_V2F16_V2F16_F32 : VOPProfile <[f32, v2f16, v2f16, f32]>; def VOP_I32_V2I16_V2I16_I32 : VOPProfile <[i32, v2i16, v2i16, i32]>; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 3f211e7cbdde5..bc25d75131cc3 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3727,7 +3727,7 @@ def FPPow2ToExponentXForm : SDNodeXFormgetValueAPF(); int Log2 = APF.getExactLog2Abs(); assert(Log2 != INT_MIN); - return CurDAG->getTargetConstant(Log2, SDLoc(N), MVT::i32); + return CurDAG->getSignedTargetConstant(Log2, SDLoc(N), MVT::i32); }]>; // Check if a floating point value is a power of 2 floating-point diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 246ef7ad481ab..049f4af4dd2f9 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -319,7 +319,8 @@ struct SGPRSpillBuilder { SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) : AMDGPUGenRegisterInfo(AMDGPU::PC_REG, ST.getAMDGPUDwarfFlavour(), - ST.getAMDGPUDwarfFlavour()), + ST.getAMDGPUDwarfFlavour(), + /*PC=*/0, ST.getHwMode()), ST(ST), SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) { assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 && diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index e3baeed01841a..51fdd4211a5cf 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -1249,7 +1249,9 @@ def VRegSrc_32 : SrcReg9; def VRegSrc_64 : SrcReg9; def VRegSrc_96 : SrcReg9; def VRegSrc_128: SrcReg9; +def VRegSrc_192: SrcReg9; def VRegSrc_256: SrcReg9; +def VRegSrc_512: SrcReg9; def VRegOrLdsSrc_32 : SrcReg9; // True 16 Operands diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td index a60b1f28e9d34..117add324db56 100644 --- a/llvm/lib/Target/AMDGPU/SISchedule.td +++ b/llvm/lib/Target/AMDGPU/SISchedule.td @@ -64,6 +64,7 @@ def Write8PassMAI : SchedWrite; def Write16PassMAI : SchedWrite; def Write4PassDGEMM : SchedWrite; def Write8PassDGEMM : SchedWrite; +def Write16PassDGEMM : SchedWrite; // Scalar float instructions def WriteSFPU : SchedWrite; @@ -94,6 +95,7 @@ def SIFullSpeedModel : SISchedMachineModel; def SIQuarterSpeedModel : SISchedMachineModel; def SIDPFullSpeedModel : SISchedMachineModel; def SIDPGFX940FullSpeedModel : SISchedMachineModel; +def SIDPGFX950FullSpeedModel : SISchedMachineModel; def GFX10SpeedModel : SISchedMachineModel; def GFX11SpeedModel : SISchedMachineModel; def GFX12SpeedModel : SISchedMachineModel; @@ -169,6 +171,8 @@ multiclass SICommonWriteRes { def : HWVALUWriteRes; let ReleaseAtCycles = [8] in def : HWVALUWriteRes; + let ReleaseAtCycles = [16] in + def : HWVALUWriteRes; let ReleaseAtCycles = [2] in def : HWWriteRes; @@ -201,6 +205,13 @@ def WriteCopy : SchedWriteVariant<[ SchedVar, SchedVar]>; +// Check if any matrix inputs are interpreted as f8 in an f8f6f4 mfma +// instruction. +def PredIsF8_MFMA_SCALE : SchedPredicate<[{ + TII->getNamedOperand(*MI, AMDGPU::OpName::cbsz)->getImm() <= AMDGPU::MFMAScaleFormats::FP8_E5M2 || + TII->getNamedOperand(*MI, AMDGPU::OpName::blgp)->getImm() <= AMDGPU::MFMAScaleFormats::FP8_E5M2 +}]>; + let SchedModel = SIFullSpeedModel in { defm : SICommonWriteRes; @@ -299,6 +310,58 @@ def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_SMFMAC_.32_32X32X")>; } // End SchedModel = SIDPGFX940FullSpeedModel + +let SchedModel = SIDPGFX950FullSpeedModel in { +defm : SICommonWriteRes; + +def : HWVALUWriteRes; +def : HWVALUWriteRes; +def : HWVALUWriteRes; +def : HWVALUWriteRes; +def : HWVALUWriteRes; +def : HWVALUWriteRes; +def : HWVALUWriteRes; + +def : InstRW<[WriteCopy], (instrs COPY)>; +def : InstRW<[Write64Bit], (instregex "^V_ACCVGPR_WRITE_B32_e64$")>; +def : InstRW<[Write2PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_4X4X")>; + +def : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X8X")>; +def : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X16")>; +def : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X32")>; +def : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X64")>; +def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X[14][FBI]")>; + +def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X4XF")>; +def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X8")>; +def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X16")>; +def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X32_")>; +def : InstRW<[Write16PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X[124][FBI]")>; + +def : InstRW<[Write4PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_4X4X")>; +def : InstRW<[Write16PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_16X16X")>; + +def : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_SMFMAC_.32_16X16X")>; +def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_SMFMAC_.32_32X32X")>; + + +// If either matrix format is f8, the instruction takes 2x as many +// cycles. TODO: This isn't reflected in MCA. +def WriteMFMAScale_16X16X128_F8F6F4 : SchedWriteVariant<[ + SchedVar, + SchedVar]>; +def WriteMFMAScale_32X32X64_F8F6F4 : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def : InstRW<[WriteMFMAScale_16X16X128_F8F6F4, MIMFMARead], + (instregex "^V_MFMA(_SCALE)?_.32_16X16X128_F8F6F4")>; +def : InstRW<[WriteMFMAScale_32X32X64_F8F6F4, MIMFMARead], + (instregex "^V_MFMA(_SCALE)?_.32_32X32X64_F8F6F4")>; + +} // End SchedModel = SIDPGFX950FullSpeedModel + + let SchedModel = GFX10SpeedModel in { // The latency values are 1 / (operations / cycle). diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index c7e4659b15d29..b233e89858939 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -591,6 +591,7 @@ bool isMAC(unsigned Opc) { Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 || Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 || Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi || + Opc == AMDGPU::V_DOT2C_F32_BF16_e64_vi || Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi || Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi || Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi; diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 3cda173207dfb..1dd39be9e8d9c 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -380,6 +380,24 @@ def VOP_MOVRELS : VOPProfile<[i32, i32, untyped, untyped]> { let Src0RC64 = VRegSrc_32; } +def VOP_PERMLANE_SWAP : VOPProfile<[i32, i32, untyped, untyped]> { + let Outs32 = (outs DstRC:$vdst, VRegSrc_32:$src0_out); + let Outs64 = (outs DstRC64:$vdst, VRegSrc_32:$src0_out); + + let Src0RC32 = VRegSrc_32; + let Src0RC64 = VRegSrc_32; + let HasClamp = 0; + let HasExtVOP3DPP = 0; + let HasExtDPP = 0; + let HasExtSDWA = 0; + + let Ins32 = (ins Src0RC64:$vdst_in, Src0RC32:$src0); + let Ins64 = (ins Src0RC64:$vdst_in, Src0RC64:$src0, Dpp16FI:$fi, DppBoundCtrl:$bound_ctrl); + let InsVOP3OpSel = (ins Src0RC64:$vdst_in, Src0RC64:$src0, Dpp16FI:$fi, DppBoundCtrl:$bound_ctrl); + let Asm64 = "$vdst, $src0$bound_ctrl$fi"; + let AsmVOP3OpSel = "$vdst, $src0$bound_ctrl$fi"; +} + // Special case because there are no true output operands. Hack vdst // to be a src operand. The custom inserter must add a tied implicit // def and use of the super register since there seems to be no way to @@ -767,6 +785,18 @@ let SubtargetPredicate = isGFX11Plus in { let SubtargetPredicate = HasPrngInst in defm V_PRNG_B32 : VOP1Inst <"v_prng_b32", VOP_I32_I32, int_amdgcn_prng_b32>; +let Constraints = "$vdst = $vdst_in, $src0_out = $src0", + DisableEncoding="$vdst_in,$src0_out", + SchedRW = [Write32Bit, Write32Bit] in { +let SubtargetPredicate = HasPermlane16Swap in { +defm V_PERMLANE16_SWAP_B32 : VOP1Inst<"v_permlane16_swap_b32", VOP_PERMLANE_SWAP>; +} + +let SubtargetPredicate = HasPermlane32Swap in { +defm V_PERMLANE32_SWAP_B32 : VOP1Inst<"v_permlane32_swap_b32", VOP_PERMLANE_SWAP>; +} +} + foreach vt = Reg32Types.types in { def : GCNPat<(int_amdgcn_permlane64 (vt VRegSrc_32:$src0)), (vt (V_PERMLANE64_B32 (vt VRegSrc_32:$src0))) @@ -1512,6 +1542,20 @@ let DecoderNamespace = "GFX9" in { } } +/// Special case of VOP1 instructions, with a VOP3 form where op_sel +/// is used for DPP operands. +multiclass VOP1_OpSel_Real_e32e64_gfx9 op> { + let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in { + def _e32_gfx9 : + VOP1_Real(NAME#"_e32"), SIEncodingFamily.GFX9>, + VOP1e(NAME#"_e32").Pfl>; + + def _e64_gfx9 : + VOP3_Real(NAME#"_e64"), SIEncodingFamily.GFX9>, + VOP3OpSelIsDPP_gfx9(NAME#"_e64").Pfl>; + } +} + defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; let AssemblerPredicate = isGFX940Plus in @@ -1525,6 +1569,8 @@ defm V_CVT_PK_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x56>; defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>; defm V_PRNG_B32 : VOP1_Real_gfx9 <0x58>; +defm V_PERMLANE16_SWAP_B32 : VOP1_OpSel_Real_e32e64_gfx9<0x059>; +defm V_PERMLANE32_SWAP_B32 : VOP1_OpSel_Real_e32e64_gfx9<0x05a>; class MovDPP8Pattern : GCNPat < (vt (int_amdgcn_mov_dpp8 vt:$src, timm:$dpp8)), diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 103575dc351f2..128c775619118 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -567,6 +567,12 @@ def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC { let HasClamp = 1; } +def VOP_DOT_ACC_F32_V2BF16 : VOP_DOT_ACC { + let Src0ModDPP = FPVRegInputMods; + let Src1ModDPP = FPVRegInputMods; + let HasClamp = 1; +} + def VOP_DOT_ACC_I32_I32 : VOP_DOT_ACC { let HasExtVOP3DPP = 0; let HasSrc0Mods = 1; @@ -1182,6 +1188,9 @@ let Constraints = "$vdst = $src2", defm V_DOT2C_I32_I16 : VOP2Inst<"v_dot2c_i32_i16", VOP_DOT_ACC_I32_I32>; let SubtargetPredicate = HasDot3Insts in defm V_DOT8C_I32_I4 : VOP2Inst<"v_dot8c_i32_i4", VOP_DOT_ACC_I32_I32>; + + let SubtargetPredicate = HasDot13Insts in + defm V_DOT2C_F32_BF16 : VOP2Inst<"v_dot2c_f32_bf16", VOP_DOT_ACC_F32_V2BF16>; } let AddedComplexity = 30 in { @@ -1191,6 +1200,12 @@ let AddedComplexity = 30 in { > { let SubtargetPredicate = HasDot5Insts; } + def : GCNPat< + (f32 (int_amdgcn_fdot2_f32_bf16 v2bf16:$src0, v2bf16:$src1, f32:$src2, (i1 DSTCLAMP.NONE))), + (f32 (V_DOT2C_F32_BF16_e32 $src0, $src1, $src2)) + > { + let SubtargetPredicate = HasDot13Insts; + } def : GCNPat< (i32 (int_amdgcn_sdot4 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), (i32 (V_DOT4C_I32_I8_e32 $src0, $src1, $src2)) @@ -2670,3 +2685,8 @@ let SubtargetPredicate = HasDot3Insts in { let DecoderNamespace = "GFX10_B" in defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx10<0x02>; } + +let OtherPredicates = [HasDot13Insts] in { + let DecoderNamespace = "GFX950" in + defm V_DOT2C_F32_BF16 : VOP2_Real_DOT_ACC_gfx9<0x16>; +} diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 917e1b3974b46..5d4d56e8b0ad2 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -856,6 +856,162 @@ class PermlaneVarPat; +class VOP3_BITOP3_Profile : VOP3_Profile { + let HasClamp = 0; + let HasOMod = 0; + let HasModifiers = 0; + + let Ins64 = !con(getIns64.ret, + (ins bitop3_0:$bitop3)); + + let InsVOP3OpSel = !con(getInsVOP3Base.ret, + (ins bitop3_0:$bitop3, op_sel0:$op_sel)); + + let Asm64 = "$vdst, $src0, $src1, $src2$bitop3"; + let AsmVOP3OpSel = !subst("$op_sel", "$bitop3$op_sel", getAsmVOP3OpSel<3, 0, 0, 0, 0, 0>.ret); +} + +class VOP3_CVT_SCALE_F1632_FP8BF8_Profile : VOP3_Profile, + VOP3_OPSEL> { + let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0, + FP32InputMods:$src1_modifiers, Src1RC64:$src1, + op_sel0:$op_sel); + let HasClamp = 0; + let HasSrc2 = 0; + let HasSrc2Mods = 0; + let HasExtVOP3DPP = 0; + let HasOpSel = 1; + let HasOMod = 0; +} + +def VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile : VOP3_Profile, + VOP3_OPSEL> { + let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0, + FP32InputMods:$src1_modifiers, Src1RC64:$src1, + FP32InputMods:$src2_modifiers, Src2RC64:$src2, + op_sel0:$op_sel); + let HasClamp = 0; + let HasExtVOP3DPP = 0; + let HasOpSel = 1; + let HasOMod = 0; +} + +def VOP3_CVT_SCALE_FP4_F16BF16_Profile : VOP3_Profile, + VOP3_OPSEL> { + let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0, + FP32InputMods:$src1_modifiers, Src1RC64:$src1, + FP32InputMods:$src2_modifiers, VGPR_32:$src2, + op_sel0:$op_sel); + let HasClamp = 0; + let HasSrc2 = 0; + let HasSrc2Mods = 1; + let HasOpSel = 1; + let AsmVOP3OpSel = !subst(", $src2_modifiers", "", + getAsmVOP3OpSel<3, HasClamp, HasOMod, + HasSrc0FloatMods, HasSrc1FloatMods, + HasSrc2FloatMods>.ret); + let HasExtVOP3DPP = 0; +} + +class VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile : VOP3_Profile, + VOP3_OPSEL> { + let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0, + FP32InputMods:$src1_modifiers, Src1RC64:$src1, + op_sel0:$op_sel); + let HasClamp = 0; + let HasSrc2 = 0; + let HasSrc2Mods = 0; + let HasExtVOP3DPP = 0; + let HasOpSel = 1; + let HasOMod = 0; +} + +def VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile : VOP3_Profile, + VOP3_OPSEL> { + let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0, + FP32InputMods:$src1_modifiers, Src1RC64:$src1, + op_sel0:$op_sel); + let HasClamp = 0; + let HasSrc2 = 0; + let HasSrc2Mods = 0; + let HasExtVOP3DPP = 0; + let HasOpSel = 1; + let HasOMod = 0; +} + +class VOP3_CVT_SCALEF32_PK_F864_Profile : VOP3_Profile

{ + let HasModifiers = 0; + let HasSrc0IntMods = 0; + let HasSrc1IntMods = 0; + let HasOMod = 0; + let HasOpSel = 0; + let HasClamp = 0; + let HasExtDPP = 0; + let HasExt32BitDPP = 0; + let HasExtVOP3DPP = 0; + let HasExt64BitDPP = 0; +} + +let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in { + defm V_CVT_SCALEF32_F16_FP8 : VOP3Inst<"v_cvt_scalef32_f16_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile>; + defm V_CVT_SCALEF32_F32_FP8 : VOP3Inst<"v_cvt_scalef32_f32_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile>; + defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile>; + defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp8", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile>; + defm V_CVT_SCALEF32_PK_FP8_F16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>; + defm V_CVT_SCALEF32_PK_FP8_BF16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_bf16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>; + defm V_CVT_SCALEF32_PK_F16_FP8 : VOP3Inst<"v_cvt_scalef32_pk_f16_fp8", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile>; + defm V_CVT_SCALEF32_PK_BF16_FP8 : VOP3Inst<"v_cvt_scalef32_pk_bf16_fp8", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile>; +} + +let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in { + defm V_CVT_SCALEF32_F16_BF8 : VOP3Inst<"v_cvt_scalef32_f16_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile>; + defm V_CVT_SCALEF32_F32_BF8 : VOP3Inst<"v_cvt_scalef32_f32_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile>; + defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile>; + defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3Inst<"v_cvt_scalef32_pk_f32_bf8", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile>; + defm V_CVT_SCALEF32_PK_BF8_F16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>; + defm V_CVT_SCALEF32_PK_BF8_BF16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_bf16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>; + defm V_CVT_SCALEF32_PK_F16_BF8 : VOP3Inst<"v_cvt_scalef32_pk_f16_bf8", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile>; + defm V_CVT_SCALEF32_PK_BF16_BF8 : VOP3Inst<"v_cvt_scalef32_pk_bf16_bf8", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile>; +} + +let SubtargetPredicate = HasFP4ConversionScaleInsts, mayRaiseFPException = 0 in { + defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile>; + defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp4_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile>; + defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile>; + defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_bf16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile>; + + // These instructions have non-standard use of op_sel. In particular they are + // using op_sel bits 2 and 3 while only having two sources. + let Constraints = "$vdst = $src2", DisableEncoding = "$src2" in { + defm V_CVT_SCALEF32_PK_FP4_F16 : VOP3Inst<"v_cvt_scalef32_pk_fp4_f16", VOP3_CVT_SCALE_FP4_F16BF16_Profile>; + defm V_CVT_SCALEF32_PK_FP4_BF16 : VOP3Inst<"v_cvt_scalef32_pk_fp4_bf16", VOP3_CVT_SCALE_FP4_F16BF16_Profile>; + } +} + +let SubtargetPredicate = HasFP6BF6ConversionScaleInsts, mayRaiseFPException = 0 in { + defm V_CVT_SCALEF32_PK32_F32_FP6 : VOP3Inst<"v_cvt_scalef32_pk32_f32_fp6", VOP3_CVT_SCALEF32_PK_F864_Profile>; + defm V_CVT_SCALEF32_PK32_F32_BF6 : VOP3Inst<"v_cvt_scalef32_pk32_f32_bf6", VOP3_CVT_SCALEF32_PK_F864_Profile>; + defm V_CVT_SCALEF32_PK32_F16_FP6 : VOP3Inst<"v_cvt_scalef32_pk32_f16_fp6", VOP3_CVT_SCALEF32_PK_F864_Profile>; + defm V_CVT_SCALEF32_PK32_BF16_FP6 : VOP3Inst<"v_cvt_scalef32_pk32_bf16_fp6", VOP3_CVT_SCALEF32_PK_F864_Profile>; + defm V_CVT_SCALEF32_PK32_F16_BF6 : VOP3Inst<"v_cvt_scalef32_pk32_f16_bf6", VOP3_CVT_SCALEF32_PK_F864_Profile>; + defm V_CVT_SCALEF32_PK32_BF16_BF6 : VOP3Inst<"v_cvt_scalef32_pk32_bf16_bf6", VOP3_CVT_SCALEF32_PK_F864_Profile>; +} + +let SubtargetPredicate = HasF16BF16ToFP6BF6ConversionScaleInsts, mayRaiseFPException = 0 in { + defm V_CVT_SCALEF32_PK32_FP6_F16 : VOP3Inst<"v_cvt_scalef32_pk32_fp6_f16", VOP3_CVT_SCALEF32_PK_F864_Profile, int_amdgcn_cvt_scalef32_pk32_fp6_f16>; + defm V_CVT_SCALEF32_PK32_BF6_F16 : VOP3Inst<"v_cvt_scalef32_pk32_bf6_f16", VOP3_CVT_SCALEF32_PK_F864_Profile, int_amdgcn_cvt_scalef32_pk32_bf6_f16>; + defm V_CVT_SCALEF32_PK32_FP6_BF16 : VOP3Inst<"v_cvt_scalef32_pk32_fp6_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile, int_amdgcn_cvt_scalef32_pk32_fp6_bf16>; + defm V_CVT_SCALEF32_PK32_BF6_BF16 : VOP3Inst<"v_cvt_scalef32_pk32_bf6_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile, int_amdgcn_cvt_scalef32_pk32_bf6_bf16>; +} + +let SubtargetPredicate = HasGFX950Insts, mayRaiseFPException = 0 in { + defm V_CVT_SCALEF32_2XPK16_FP6_F32 : VOP3Inst<"v_cvt_scalef32_2xpk16_fp6_f32", VOP3_CVT_SCALEF32_PK_F864_Profile, int_amdgcn_cvt_scalef32_2xpk16_fp6_f32>; + defm V_CVT_SCALEF32_2XPK16_BF6_F32 : VOP3Inst<"v_cvt_scalef32_2xpk16_bf6_f32", VOP3_CVT_SCALEF32_PK_F864_Profile, int_amdgcn_cvt_scalef32_2xpk16_bf6_f32>; +} + let SubtargetPredicate = isGFX10Plus in { let isCommutable = 1, isReMaterializable = 1 in { defm V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile>; @@ -908,6 +1064,16 @@ let SubtargetPredicate = isGFX12Plus in { } // End SubtargetPredicate = isGFX12Plus +let SubtargetPredicate = HasBitOp3Insts in { + let isReMaterializable = 1 in { + defm V_BITOP3_B16 : VOP3Inst <"v_bitop3_b16", + VOP3_BITOP3_Profile>, + VOP3_OPSEL>>; + defm V_BITOP3_B32 : VOP3Inst <"v_bitop3_b32", + VOP3_BITOP3_Profile, VOP3_REGULAR>>; + } +} // End SubtargetPredicate = HasBitOp3Insts + class DivFmasPat : GCNPat< (AMDGPUdiv_fmas (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)), (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), @@ -1023,6 +1189,11 @@ let SubtargetPredicate = HasPseudoScalarTrans in { def : PseudoScalarPatF16; } +let SubtargetPredicate = HasAshrPkInsts, isReMaterializable = 1 in { + defm V_ASHR_PK_I8_I32 : VOP3Inst<"v_ashr_pk_i8_i32", VOP3_Profile, int_amdgcn_ashr_pk_i8_i32>; + defm V_ASHR_PK_U8_I32 : VOP3Inst<"v_ashr_pk_u8_i32", VOP3_Profile, int_amdgcn_ashr_pk_u8_i32>; +} // End SubtargetPredicate = HasAshrPkInsts, isReMaterializable = 1 + //===----------------------------------------------------------------------===// // Integer Clamp Patterns //===----------------------------------------------------------------------===// @@ -1606,6 +1777,23 @@ multiclass VOP3_Real_gfx9 op, string AsmName> { } } +multiclass VOP3_Real_BITOP3_gfx9 op, string AsmName, bit isSingle = 0> { + defvar ps = !cast(NAME#"_e64"); + let IsSingle = !or(isSingle, ps.Pfl.IsSingle) in { + def _gfx9 : VOP3_Real(NAME#"_e64"), SIEncodingFamily.GFX9>, + VOP3e_vi (NAME#"_e64").Pfl> { + let AsmString = AsmName # ps.AsmOperands; + bits<8> bitop3; + let Inst{60-59} = bitop3{7-6}; + let Inst{10-8} = bitop3{5-3}; + let Inst{63-61} = bitop3{2-0}; + let Inst{11} = !if(ps.Pfl.HasOpSel, src0_modifiers{2}, 0); + let Inst{12} = !if(ps.Pfl.HasOpSel, src1_modifiers{2}, 0); + let Inst{13} = !if(ps.Pfl.HasOpSel, src2_modifiers{2}, 0); + let Inst{14} = !if(ps.Pfl.HasOpSel, src0_modifiers{3}, 0); + } + } +} } // End AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" defm V_MAD_U64_U32 : VOP3be_Real_vi <0x1E8>; @@ -1748,3 +1936,58 @@ defm V_CVT_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x2a3>; defm V_CVT_PK_BF16_F32: VOP3OpSel_Real_gfx9 <0x268>; defm V_CVT_SR_FP8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a4>; defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>; + +defm V_MINIMUM3_F32 : VOP3_Real_vi <0x2a8>; +defm V_MAXIMUM3_F32 : VOP3_Real_vi <0x2a9>; + +defm V_BITOP3_B16 : VOP3_Real_BITOP3_gfx9<0x233, "v_bitop3_b16">; +defm V_BITOP3_B32 : VOP3_Real_BITOP3_gfx9<0x234, "v_bitop3_b32">; +let OtherPredicates = [HasFP8ConversionScaleInsts] in { +defm V_CVT_SCALEF32_F16_FP8 : VOP3OpSel_Real_gfx9 <0x24a>; +defm V_CVT_SCALEF32_F32_FP8 : VOP3OpSel_Real_gfx9 <0x23b>; +defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x235>; +defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3OpSel_Real_gfx9 <0x239>; +defm V_CVT_SCALEF32_PK_FP8_F16 : VOP3OpSel_Real_gfx9 <0x240>; +defm V_CVT_SCALEF32_PK_FP8_BF16: VOP3OpSel_Real_gfx9 <0x244>; +defm V_CVT_SCALEF32_PK_F16_FP8 : VOP3OpSel_Real_gfx9<0x248>; +defm V_CVT_SCALEF32_PK_BF16_FP8 : VOP3OpSel_Real_gfx9<0x269>; +} +let OtherPredicates = [HasBF8ConversionScaleInsts] in { +defm V_CVT_SCALEF32_F16_BF8 : VOP3OpSel_Real_gfx9 <0x24b>; +defm V_CVT_SCALEF32_F32_BF8 : VOP3OpSel_Real_gfx9 <0x23c>; +defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x236>; +defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3OpSel_Real_gfx9 <0x23a>; +defm V_CVT_SCALEF32_PK_BF8_F16 : VOP3OpSel_Real_gfx9 <0x241>; +defm V_CVT_SCALEF32_PK_BF8_BF16: VOP3OpSel_Real_gfx9 <0x245>; +defm V_CVT_SCALEF32_PK_F16_BF8 : VOP3OpSel_Real_gfx9<0x249>; +defm V_CVT_SCALEF32_PK_BF16_BF8 : VOP3OpSel_Real_gfx9<0x26a>; +} +let OtherPredicates = [HasFP4ConversionScaleInsts] in { +defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3OpSel_Real_gfx9 <0x23f>; +defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3OpSel_Real_gfx9 <0x23d>; +defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3OpSel_Real_gfx9 <0x250>; +defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3OpSel_Real_gfx9 <0x251>; +defm V_CVT_SCALEF32_PK_FP4_F16 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x24c>; +defm V_CVT_SCALEF32_PK_FP4_BF16: VOP3OpSel_Real_gfx9_forced_opsel2 <0x24d>; +} +let OtherPredicates = [HasFP6BF6ConversionScaleInsts] in { +defm V_CVT_SCALEF32_PK32_F32_FP6 : VOP3_Real_gfx9<0x256, "v_cvt_scalef32_pk32_f32_fp6">; +defm V_CVT_SCALEF32_PK32_F32_BF6 : VOP3_Real_gfx9<0x257, "v_cvt_scalef32_pk32_f32_bf6">; +defm V_CVT_SCALEF32_PK32_F16_FP6 : VOP3_Real_gfx9<0x260, "v_cvt_scalef32_pk32_f16_fp6">; +defm V_CVT_SCALEF32_PK32_BF16_FP6 : VOP3_Real_gfx9<0x261, "v_cvt_scalef32_pk32_bf16_fp6">; +defm V_CVT_SCALEF32_PK32_F16_BF6 : VOP3_Real_gfx9<0x262, "v_cvt_scalef32_pk32_f16_bf6">; +defm V_CVT_SCALEF32_PK32_BF16_BF6 : VOP3_Real_gfx9<0x263, "v_cvt_scalef32_pk32_bf16_bf6">; +} + +let OtherPredicates = [HasF16BF16ToFP6BF6ConversionScaleInsts] in { +defm V_CVT_SCALEF32_PK32_FP6_F16 : VOP3_Real_gfx9<0x258, "v_cvt_scalef32_pk32_fp6_f16">; +defm V_CVT_SCALEF32_PK32_FP6_BF16 : VOP3_Real_gfx9<0x259, "v_cvt_scalef32_pk32_fp6_bf16">; +defm V_CVT_SCALEF32_PK32_BF6_F16 : VOP3_Real_gfx9<0x25a, "v_cvt_scalef32_pk32_bf6_f16">; +defm V_CVT_SCALEF32_PK32_BF6_BF16 : VOP3_Real_gfx9<0x25b, "v_cvt_scalef32_pk32_bf6_bf16">; +} + +defm V_ASHR_PK_I8_I32 : VOP3OpSel_Real_gfx9 <0x265>; +defm V_ASHR_PK_U8_I32 : VOP3OpSel_Real_gfx9 <0x266>; + +defm V_CVT_SCALEF32_2XPK16_FP6_F32 : VOP3_Real_gfx9<0x252, "v_cvt_scalef32_2xpk16_fp6_f32">; +defm V_CVT_SCALEF32_2XPK16_BF6_F32 : VOP3_Real_gfx9<0x253, "v_cvt_scalef32_2xpk16_bf6_f32">; diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 5d8dc5ccd18e5..ae5a6581a3b20 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -144,6 +144,11 @@ def : VOP3PSatPat; def : VOP3PSatPat; } // End SubtargetPredicate = HasVOP3PInsts +let SubtargetPredicate = HasMinimum3Maximum3PKF16, FPDPRounding = 1 in { +defm V_PK_MINIMUM3_F16 : VOP3PInst<"v_pk_minimum3_f16", VOP3P_Profile>; +defm V_PK_MAXIMUM3_F16 : VOP3PInst<"v_pk_maximum3_f16", VOP3P_Profile>; +} + // TODO: Make sure we're doing the right thing with denormals. Note // that FMA and MAD will differ. multiclass MadFmaMixPats; -} // End SubtargetPredicate = HasDot9Insts +} // End SubtargetPredicate = HasDot12Insts } // End let IsDOT = 1 @@ -2050,6 +2055,9 @@ defm V_PK_MUL_F16 : VOP3P_Real_vi <0x10>; defm V_PK_MIN_F16 : VOP3P_Real_vi <0x11>; defm V_PK_MAX_F16 : VOP3P_Real_vi <0x12>; +defm V_PK_MINIMUM3_F16 : VOP3P_Real_vi <0x1b>; +defm V_PK_MAXIMUM3_F16 : VOP3P_Real_vi <0x1c>; + defm V_MAD_MIX_F32 : VOP3P_Real_vi <0x20>; defm V_MAD_MIXLO_F16 : VOP3P_Real_vi <0x21>; defm V_MAD_MIXHI_F16 : VOP3P_Real_vi <0x22>; @@ -2118,6 +2126,7 @@ defm V_MFMA_F32_16X16X128_F8F6F4 : VOP3P_Real_MFMA_F8F6F4_gfx950_mc <0x2d, "v_mf defm V_MFMA_SCALE_F32_16X16X128_F8F6F4 : VOP3PX_Real_ScaledMFMA_F8F6F4_mc <0x2d>; defm V_MFMA_F32_32X32X64_F8F6F4 : VOP3P_Real_MFMA_F8F6F4_gfx950_mc <0x2e, "v_mfma_f32_32x32x64_f8f6f4">; defm V_MFMA_SCALE_F32_32X32X64_F8F6F4 : VOP3PX_Real_ScaledMFMA_F8F6F4_mc <0x2e>; +defm V_DOT2_F32_BF16 : VOP3P_Real_vi<0x1a>; defm V_MFMA_I32_32X32X16I8 : VOP3P_Real_MFMA_gfx940 <0x56, "v_mfma_i32_32x32x16_i8">; defm V_MFMA_I32_16X16X32I8 : VOP3P_Real_MFMA_gfx940 <0x57, "v_mfma_i32_16x16x32_i8">; diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index eb9d00972468c..0e19696a32f86 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -324,6 +324,18 @@ class VOP3OpSel_gfx9 op, VOPProfile P> : VOP3e_vi { let Inst{14} = !if(P.HasDst, src0_modifiers{3}, 0); } +// Special case for v_permlane16_swap_b32/v_permlane32_swap_b32 +// op_sel[0]/op_sel[1] are treated as bound_ctrl and fi dpp operands. +class VOP3OpSelIsDPP_gfx9 op, VOPProfile P> : VOP3e_vi { + bits<1> fi; + bits<1> bound_ctrl; + + // OPSEL[0] specifies FI + let Inst{11} = fi; + // OPSEL[1] specifies BOUND_CTRL + let Inst{12} = bound_ctrl; +} + class VOP3OpSel_gfx10 op, VOPProfile p> : VOP3e_gfx10 { let Inst{11} = !if(p.HasSrc0, src0_modifiers{2}, 0); let Inst{12} = !if(p.HasSrc1, src1_modifiers{2}, 0); @@ -1443,6 +1455,7 @@ def VOP3_CLAMP : VOP3Features<1, 0, 0, 0>; def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>; def VOP3_PACKED : VOP3Features<1, 1, 1, 0>; def VOP3_MAI : VOP3Features<0, 0, 0, 1>; +def VOP3_OPSEL_ONLY : VOP3Features<0, 1, 0, 0>; // Packed is misleading, but it enables the appropriate op_sel // modifiers. diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.td b/llvm/lib/Target/ARC/ARCInstrInfo.td index 00f5f358354a9..f26b49119caba 100644 --- a/llvm/lib/Target/ARC/ARCInstrInfo.td +++ b/llvm/lib/Target/ARC/ARCInstrInfo.td @@ -55,7 +55,7 @@ def ARCcmp : SDNode<"ARCISD::CMP", SDT_ARCcmptst, [SDNPOutGlue]>; def ARCcmov : SDNode<"ARCISD::CMOV", SDT_ARCcmov, [SDNPInGlue]>; // Conditional Branch -def ARCbrcc : SDNode<"ARCISD::BRcc", SDT_ARCbrcc, [SDNPHasChain, SDNPInGlue]>; +def ARCbrcc : SDNode<"ARCISD::BRcc", SDT_ARCbrcc, [SDNPHasChain]>; // Direct Call def ARCBranchLink : SDNode<"ARCISD::BL",SDT_ARCBranchLink, diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp index 1737ec7b67199..07c79f6f227b0 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -412,24 +412,20 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const { } else if (Op.getOpcode() == ISD::ROTL && ShiftAmount == 3) { // Optimize left rotation 3 bits to swap then right rotation 1 bit. Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim); - Victim = - DAG.getNode(AVRISD::ROR, dl, VT, Victim, DAG.getConstant(1, dl, VT)); + Victim = DAG.getNode(AVRISD::ROR, dl, VT, Victim); ShiftAmount = 0; } else if (Op.getOpcode() == ISD::ROTR && ShiftAmount == 3) { // Optimize right rotation 3 bits to swap then left rotation 1 bit. Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim); - Victim = - DAG.getNode(AVRISD::ROL, dl, VT, Victim, DAG.getConstant(1, dl, VT)); + Victim = DAG.getNode(AVRISD::ROL, dl, VT, Victim); ShiftAmount = 0; } else if (Op.getOpcode() == ISD::ROTL && ShiftAmount == 7) { // Optimize left rotation 7 bits to right rotation 1 bit. - Victim = - DAG.getNode(AVRISD::ROR, dl, VT, Victim, DAG.getConstant(1, dl, VT)); + Victim = DAG.getNode(AVRISD::ROR, dl, VT, Victim); ShiftAmount = 0; } else if (Op.getOpcode() == ISD::ROTR && ShiftAmount == 7) { // Optimize right rotation 7 bits to left rotation 1 bit. - Victim = - DAG.getNode(AVRISD::ROL, dl, VT, Victim, DAG.getConstant(1, dl, VT)); + Victim = DAG.getNode(AVRISD::ROL, dl, VT, Victim); ShiftAmount = 0; } else if ((Op.getOpcode() == ISD::ROTR || Op.getOpcode() == ISD::ROTL) && ShiftAmount >= 4) { diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td index e912878e9b23c..3973cd30de1ec 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -69,9 +69,9 @@ def AVRasrbn : SDNode<"AVRISD::ASRBN", SDTIntBinOp>; def AVRlslwn : SDNode<"AVRISD::LSLWN", SDTIntBinOp>; def AVRlsrwn : SDNode<"AVRISD::LSRWN", SDTIntBinOp>; def AVRasrwn : SDNode<"AVRISD::ASRWN", SDTIntBinOp>; -def AVRlslw : SDNode<"AVRISD::LSLW", SDTIntShiftDOp>; -def AVRlsrw : SDNode<"AVRISD::LSRW", SDTIntShiftDOp>; -def AVRasrw : SDNode<"AVRISD::ASRW", SDTIntShiftDOp>; +def AVRlslw : SDNode<"AVRISD::LSLW", SDTIntShiftPairOp>; +def AVRlsrw : SDNode<"AVRISD::LSRW", SDTIntShiftPairOp>; +def AVRasrw : SDNode<"AVRISD::ASRW", SDTIntShiftPairOp>; // Pseudo shift nodes for non-constant shift amounts. def AVRlslLoop : SDNode<"AVRISD::LSLLOOP", SDTIntShiftOp>; diff --git a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp index 2c11373504e8c..aaf994b23cf3c 100644 --- a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp +++ b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp @@ -78,13 +78,13 @@ bool DXContainerGlobals::runOnModule(Module &M) { } GlobalVariable *DXContainerGlobals::getFeatureFlags(Module &M) { - const uint64_t FeatureFlags = - static_cast(getAnalysis() - .getShaderFlags() - .getFeatureFlags()); + uint64_t CombinedFeatureFlags = getAnalysis() + .getShaderFlags() + .getCombinedFlags() + .getFeatureFlags(); Constant *FeatureFlagsConstant = - ConstantInt::get(M.getContext(), APInt(64, FeatureFlags)); + ConstantInt::get(M.getContext(), APInt(64, CombinedFeatureFlags)); return buildContainerGlobal(M, FeatureFlagsConstant, "dx.sfi0", "SFI0"); } diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index 9f124394363a3..b5cc209493ed1 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -236,9 +236,14 @@ class OpLowerer { dxil::ResourceInfo &RI = *It; const auto &Binding = RI.getBinding(); + Value *IndexOp = CI->getArgOperand(3); + if (Binding.LowerBound != 0) + IndexOp = IRB.CreateAdd(IndexOp, + ConstantInt::get(Int32Ty, Binding.LowerBound)); + std::array Args{ ConstantInt::get(Int8Ty, llvm::to_underlying(RI.getResourceClass())), - ConstantInt::get(Int32Ty, Binding.RecordID), CI->getArgOperand(3), + ConstantInt::get(Int32Ty, Binding.RecordID), IndexOp, CI->getArgOperand(4)}; Expected OpCall = OpBuilder.tryCreateOp(OpCode::CreateHandle, Args, CI->getName()); @@ -257,6 +262,7 @@ class OpLowerer { [[nodiscard]] bool lowerToBindAndAnnotateHandle(Function &F) { IRBuilder<> &IRB = OpBuilder.getIRB(); + Type *Int32Ty = IRB.getInt32Ty(); return replaceFunction(F, [&](CallInst *CI) -> Error { IRB.SetInsertPoint(CI); @@ -266,6 +272,12 @@ class OpLowerer { dxil::ResourceInfo &RI = *It; const auto &Binding = RI.getBinding(); + + Value *IndexOp = CI->getArgOperand(3); + if (Binding.LowerBound != 0) + IndexOp = IRB.CreateAdd(IndexOp, + ConstantInt::get(Int32Ty, Binding.LowerBound)); + std::pair Props = RI.getAnnotateProps(); // For `CreateHandleFromBinding` we need the upper bound rather than the @@ -276,8 +288,7 @@ class OpLowerer { : Binding.LowerBound + Binding.Size - 1; Constant *ResBind = OpBuilder.getResBind( Binding.LowerBound, UpperBound, Binding.Space, RI.getResourceClass()); - std::array BindArgs{ResBind, CI->getArgOperand(3), - CI->getArgOperand(4)}; + std::array BindArgs{ResBind, IndexOp, CI->getArgOperand(4)}; Expected OpBind = OpBuilder.tryCreateOp( OpCode::CreateHandleFromBinding, BindArgs, CI->getName()); if (Error E = OpBind.takeError()) @@ -647,7 +658,7 @@ class OpLowerer { case Intrinsic::dx_typedBufferStore: HasErrors |= lowerTypedBufferStore(F); break; - case Intrinsic::dx_updateCounter: + case Intrinsic::dx_bufferUpdateCounter: HasErrors |= lowerUpdateCounter(F); break; // TODO: this can be removed when diff --git a/llvm/lib/Target/DirectX/DXILShaderFlags.cpp b/llvm/lib/Target/DirectX/DXILShaderFlags.cpp index 9fa137b4c025e..d6917dce98abd 100644 --- a/llvm/lib/Target/DirectX/DXILShaderFlags.cpp +++ b/llvm/lib/Target/DirectX/DXILShaderFlags.cpp @@ -13,36 +13,54 @@ #include "DXILShaderFlags.h" #include "DirectX.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Module.h" #include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::dxil; -static void updateFlags(ComputedShaderFlags &Flags, const Instruction &I) { - Type *Ty = I.getType(); - if (Ty->isDoubleTy()) { - Flags.Doubles = true; +static void updateFunctionFlags(ComputedShaderFlags &CSF, + const Instruction &I) { + if (!CSF.Doubles) + CSF.Doubles = I.getType()->isDoubleTy(); + + if (!CSF.Doubles) { + for (Value *Op : I.operands()) + CSF.Doubles |= Op->getType()->isDoubleTy(); + } + if (CSF.Doubles) { switch (I.getOpcode()) { case Instruction::FDiv: case Instruction::UIToFP: case Instruction::SIToFP: case Instruction::FPToUI: case Instruction::FPToSI: - Flags.DX11_1_DoubleExtensions = true; + // TODO: To be set if I is a call to DXIL intrinsic DXIL::Opcode::Fma + // https://github.com/llvm/llvm-project/issues/114554 + CSF.DX11_1_DoubleExtensions = true; break; } } } -ComputedShaderFlags ComputedShaderFlags::computeFlags(Module &M) { - ComputedShaderFlags Flags; - for (const auto &F : M) +void ModuleShaderFlags::initialize(const Module &M) { + // Collect shader flags for each of the functions + for (const auto &F : M.getFunctionList()) { + if (F.isDeclaration()) + continue; + ComputedShaderFlags CSF; for (const auto &BB : F) for (const auto &I : BB) - updateFlags(Flags, I); - return Flags; + updateFunctionFlags(CSF, I); + // Insert shader flag mask for function F + FunctionFlags.push_back({&F, CSF}); + // Update combined shader flags mask + CombinedSFMask.merge(CSF); + } + llvm::sort(FunctionFlags); } void ComputedShaderFlags::print(raw_ostream &OS) const { @@ -63,20 +81,58 @@ void ComputedShaderFlags::print(raw_ostream &OS) const { OS << ";\n"; } +/// Return the shader flags mask of the specified function Func. +const ComputedShaderFlags & +ModuleShaderFlags::getFunctionFlags(const Function *Func) const { + const auto Iter = llvm::lower_bound( + FunctionFlags, Func, + [](const std::pair FSM, + const Function *FindFunc) { return (FSM.first < FindFunc); }); + assert((Iter != FunctionFlags.end() && Iter->first == Func) && + "No Shader Flags Mask exists for function"); + return Iter->second; +} + +//===----------------------------------------------------------------------===// +// ShaderFlagsAnalysis and ShaderFlagsAnalysisPrinterPass + +// Provide an explicit template instantiation for the static ID. AnalysisKey ShaderFlagsAnalysis::Key; -ComputedShaderFlags ShaderFlagsAnalysis::run(Module &M, - ModuleAnalysisManager &AM) { - return ComputedShaderFlags::computeFlags(M); +ModuleShaderFlags ShaderFlagsAnalysis::run(Module &M, + ModuleAnalysisManager &AM) { + ModuleShaderFlags MSFI; + MSFI.initialize(M); + return MSFI; } PreservedAnalyses ShaderFlagsAnalysisPrinter::run(Module &M, ModuleAnalysisManager &AM) { - ComputedShaderFlags Flags = AM.getResult(M); - Flags.print(OS); + const ModuleShaderFlags &FlagsInfo = AM.getResult(M); + // Print description of combined shader flags for all module functions + OS << "; Combined Shader Flags for Module\n"; + FlagsInfo.getCombinedFlags().print(OS); + // Print shader flags mask for each of the module functions + OS << "; Shader Flags for Module Functions\n"; + for (const auto &F : M.getFunctionList()) { + if (F.isDeclaration()) + continue; + auto SFMask = FlagsInfo.getFunctionFlags(&F); + OS << formatv("; Function {0} : {1:x8}\n;\n", F.getName(), + (uint64_t)(SFMask)); + } + return PreservedAnalyses::all(); } +//===----------------------------------------------------------------------===// +// ShaderFlagsAnalysis and ShaderFlagsAnalysisPrinterPass + +bool ShaderFlagsAnalysisWrapper::runOnModule(Module &M) { + MSFI.initialize(M); + return false; +} + char ShaderFlagsAnalysisWrapper::ID = 0; INITIALIZE_PASS(ShaderFlagsAnalysisWrapper, "dx-shader-flag-analysis", diff --git a/llvm/lib/Target/DirectX/DXILShaderFlags.h b/llvm/lib/Target/DirectX/DXILShaderFlags.h index 1df7d27de13d3..2d60137f8b191 100644 --- a/llvm/lib/Target/DirectX/DXILShaderFlags.h +++ b/llvm/lib/Target/DirectX/DXILShaderFlags.h @@ -14,12 +14,14 @@ #ifndef LLVM_TARGET_DIRECTX_DXILSHADERFLAGS_H #define LLVM_TARGET_DIRECTX_DXILSHADERFLAGS_H +#include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include +#include namespace llvm { class Module; @@ -43,15 +45,23 @@ struct ComputedShaderFlags { constexpr uint64_t getMask(int Bit) const { return Bit != -1 ? 1ull << Bit : 0; } + + uint64_t getModuleFlags() const { + uint64_t ModuleFlags = 0; +#define DXIL_MODULE_FLAG(DxilModuleBit, FlagName, Str) \ + ModuleFlags |= FlagName ? getMask(DxilModuleBit) : 0ull; +#include "llvm/BinaryFormat/DXContainerConstants.def" + return ModuleFlags; + } + operator uint64_t() const { - uint64_t FlagValue = 0; + uint64_t FlagValue = getModuleFlags(); #define SHADER_FEATURE_FLAG(FeatureBit, DxilModuleBit, FlagName, Str) \ FlagValue |= FlagName ? getMask(DxilModuleBit) : 0ull; -#define DXIL_MODULE_FLAG(DxilModuleBit, FlagName, Str) \ - FlagValue |= FlagName ? getMask(DxilModuleBit) : 0ull; #include "llvm/BinaryFormat/DXContainerConstants.def" return FlagValue; } + uint64_t getFeatureFlags() const { uint64_t FeatureFlags = 0; #define SHADER_FEATURE_FLAG(FeatureBit, DxilModuleBit, FlagName, Str) \ @@ -60,11 +70,33 @@ struct ComputedShaderFlags { return FeatureFlags; } - static ComputedShaderFlags computeFlags(Module &M); + void merge(const uint64_t IVal) { +#define SHADER_FEATURE_FLAG(FeatureBit, DxilModuleBit, FlagName, Str) \ + FlagName |= (IVal & getMask(DxilModuleBit)); +#define DXIL_MODULE_FLAG(DxilModuleBit, FlagName, Str) \ + FlagName |= (IVal & getMask(DxilModuleBit)); +#include "llvm/BinaryFormat/DXContainerConstants.def" + return; + } + void print(raw_ostream &OS = dbgs()) const; LLVM_DUMP_METHOD void dump() const { print(); } }; +struct ModuleShaderFlags { + void initialize(const Module &); + const ComputedShaderFlags &getFunctionFlags(const Function *) const; + const ComputedShaderFlags &getCombinedFlags() const { return CombinedSFMask; } + +private: + /// Vector of sorted Function-Shader Flag mask pairs representing properties + /// of each of the functions in the module. Shader Flags of each function + /// represent both module-level and function-level flags + SmallVector> FunctionFlags; + /// Combined Shader Flag Mask of all functions of the module + ComputedShaderFlags CombinedSFMask{}; +}; + class ShaderFlagsAnalysis : public AnalysisInfoMixin { friend AnalysisInfoMixin; static AnalysisKey Key; @@ -72,9 +104,9 @@ class ShaderFlagsAnalysis : public AnalysisInfoMixin { public: ShaderFlagsAnalysis() = default; - using Result = ComputedShaderFlags; + using Result = ModuleShaderFlags; - ComputedShaderFlags run(Module &M, ModuleAnalysisManager &AM); + ModuleShaderFlags run(Module &M, ModuleAnalysisManager &AM); }; /// Printer pass for ShaderFlagsAnalysis results. @@ -92,19 +124,16 @@ class ShaderFlagsAnalysisPrinter /// This is required because the passes that will depend on this are codegen /// passes which run through the legacy pass manager. class ShaderFlagsAnalysisWrapper : public ModulePass { - ComputedShaderFlags Flags; + ModuleShaderFlags MSFI; public: static char ID; ShaderFlagsAnalysisWrapper() : ModulePass(ID) {} - const ComputedShaderFlags &getShaderFlags() { return Flags; } + const ModuleShaderFlags &getShaderFlags() { return MSFI; } - bool runOnModule(Module &M) override { - Flags = ComputedShaderFlags::computeFlags(M); - return false; - } + bool runOnModule(Module &M) override; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp index be370e10df694..4ba10d123e8d2 100644 --- a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp +++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp @@ -286,11 +286,6 @@ static MDTuple *emitTopLevelLibraryNode(Module &M, MDNode *RMD, MDTuple *Properties = nullptr; if (ShaderFlags != 0) { SmallVector MDVals; - // FIXME: ShaderFlagsAnalysis pass needs to collect and provide - // ShaderFlags for each entry function. Currently, ShaderFlags value - // provided by ShaderFlagsAnalysis pass is created by walking *all* the - // function instructions of the module. Is it is correct to use this value - // for metadata of the empty library entry? MDVals.append( getTagValueAsMetadata(EntryPropsTag::ShaderFlags, ShaderFlags, Ctx)); Properties = MDNode::get(Ctx, MDVals); @@ -302,7 +297,7 @@ static MDTuple *emitTopLevelLibraryNode(Module &M, MDNode *RMD, static void translateMetadata(Module &M, const DXILResourceMap &DRM, const Resources &MDResources, - const ComputedShaderFlags &ShaderFlags, + const ModuleShaderFlags &ShaderFlags, const ModuleMetadataInfo &MMDI) { LLVMContext &Ctx = M.getContext(); IRBuilder<> IRB(Ctx); @@ -318,23 +313,27 @@ static void translateMetadata(Module &M, const DXILResourceMap &DRM, // See https://github.com/llvm/llvm-project/issues/57928 MDTuple *Signatures = nullptr; - if (MMDI.ShaderProfile == Triple::EnvironmentType::Library) + if (MMDI.ShaderProfile == Triple::EnvironmentType::Library) { + // Get the combined shader flag mask of all functions in the library to be + // used as shader flags mask value associated with top-level library entry + // metadata. + uint64_t CombinedMask = ShaderFlags.getCombinedFlags(); EntryFnMDNodes.emplace_back( - emitTopLevelLibraryNode(M, ResourceMD, ShaderFlags)); - else if (MMDI.EntryPropertyVec.size() > 1) { + emitTopLevelLibraryNode(M, ResourceMD, CombinedMask)); + } else if (MMDI.EntryPropertyVec.size() > 1) { M.getContext().diagnose(DiagnosticInfoTranslateMD( M, "Non-library shader: One and only one entry expected")); } for (const EntryProperties &EntryProp : MMDI.EntryPropertyVec) { - // FIXME: ShaderFlagsAnalysis pass needs to collect and provide - // ShaderFlags for each entry function. For now, assume shader flags value - // of entry functions being compiled for lib_* shader profile viz., - // EntryPro.Entry is 0. - uint64_t EntryShaderFlags = - (MMDI.ShaderProfile == Triple::EnvironmentType::Library) ? 0 - : ShaderFlags; + const ComputedShaderFlags &EntrySFMask = + ShaderFlags.getFunctionFlags(EntryProp.Entry); + + // If ShaderProfile is Library, mask is already consolidated in the + // top-level library node. Hence it is not emitted. + uint64_t EntryShaderFlags = 0; if (MMDI.ShaderProfile != Triple::EnvironmentType::Library) { + EntryShaderFlags = EntrySFMask; if (EntryProp.ShaderStage != MMDI.ShaderProfile) { M.getContext().diagnose(DiagnosticInfoTranslateMD( M, @@ -361,8 +360,7 @@ PreservedAnalyses DXILTranslateMetadata::run(Module &M, ModuleAnalysisManager &MAM) { const DXILResourceMap &DRM = MAM.getResult(M); const dxil::Resources &MDResources = MAM.getResult(M); - const ComputedShaderFlags &ShaderFlags = - MAM.getResult(M); + const ModuleShaderFlags &ShaderFlags = MAM.getResult(M); const dxil::ModuleMetadataInfo MMDI = MAM.getResult(M); translateMetadata(M, DRM, MDResources, ShaderFlags, MMDI); @@ -393,7 +391,7 @@ class DXILTranslateMetadataLegacy : public ModulePass { getAnalysis().getResourceMap(); const dxil::Resources &MDResources = getAnalysis().getDXILResource(); - const ComputedShaderFlags &ShaderFlags = + const ModuleShaderFlags &ShaderFlags = getAnalysis().getShaderFlags(); dxil::ModuleMetadataInfo MMDI = getAnalysis().getModuleMetadata(); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index ab0f41343ce21..816e063f8dbbe 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -1575,9 +1575,10 @@ HexagonTargetLowering::resizeToWidth(SDValue VecV, MVT ResTy, bool Signed, unsigned ResWidth = ResTy.getSizeInBits(); if (InpTy.isFloatingPoint()) { - return InpWidth < ResWidth ? DAG.getNode(ISD::FP_EXTEND, dl, ResTy, VecV) - : DAG.getNode(ISD::FP_ROUND, dl, ResTy, VecV, - getZero(dl, MVT::i32, DAG)); + return InpWidth < ResWidth + ? DAG.getNode(ISD::FP_EXTEND, dl, ResTy, VecV) + : DAG.getNode(ISD::FP_ROUND, dl, ResTy, VecV, + DAG.getTargetConstant(0, dl, MVT::i32)); } assert(InpTy.isInteger()); diff --git a/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp b/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp index f496085c88356..fcad5f7460bb2 100644 --- a/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp +++ b/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp @@ -289,17 +289,17 @@ class M68kDAGToDAGISel : public SelectionDAGISel { /// Return a target constant with the specified value of type i8. inline SDValue getI8Imm(int64_t Imm, const SDLoc &DL) { - return CurDAG->getTargetConstant(Imm, DL, MVT::i8); + return CurDAG->getSignedTargetConstant(Imm, DL, MVT::i8); } /// Return a target constant with the specified value of type i8. inline SDValue getI16Imm(int64_t Imm, const SDLoc &DL) { - return CurDAG->getTargetConstant(Imm, DL, MVT::i16); + return CurDAG->getSignedTargetConstant(Imm, DL, MVT::i16); } /// Return a target constant with the specified value, of type i32. inline SDValue getI32Imm(int64_t Imm, const SDLoc &DL) { - return CurDAG->getTargetConstant(Imm, DL, MVT::i32); + return CurDAG->getSignedTargetConstant(Imm, DL, MVT::i32); } /// Return a reference to the TargetInstrInfo, casted to the target-specific diff --git a/llvm/lib/Target/M68k/M68kISelLowering.cpp b/llvm/lib/Target/M68k/M68kISelLowering.cpp index 8caa88f988b84..ff966baecf27d 100644 --- a/llvm/lib/Target/M68k/M68kISelLowering.cpp +++ b/llvm/lib/Target/M68k/M68kISelLowering.cpp @@ -2947,7 +2947,7 @@ void M68kTargetLowering::LowerAsmOperandForConstraint(SDValue Op, llvm_unreachable("Unhandled constant constraint"); } - Result = DAG.getTargetConstant(Val, SDLoc(Op), Op.getValueType()); + Result = DAG.getSignedTargetConstant(Val, SDLoc(Op), Op.getValueType()); break; } default: @@ -2983,7 +2983,7 @@ void M68kTargetLowering::LowerAsmOperandForConstraint(SDValue Op, llvm_unreachable("Unhandled constant constraint"); } - Result = DAG.getTargetConstant(Val, SDLoc(Op), Op.getValueType()); + Result = DAG.getSignedTargetConstant(Val, SDLoc(Op), Op.getValueType()); break; } default: @@ -3415,7 +3415,7 @@ SDValue M68kTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, Result = DAG.getNode(ISD::SUB, DL, VT, SP, Size); // Value if (Align > StackAlign) Result = DAG.getNode(ISD::AND, DL, VT, Result, - DAG.getConstant(-(uint64_t)Align, DL, VT)); + DAG.getSignedConstant(-(uint64_t)Align, DL, VT)); Chain = DAG.getCopyToReg(Chain, DL, SPReg, Result); // Output chain } @@ -3442,7 +3442,7 @@ SDValue M68kTargetLowering::LowerShiftLeftParts(SDValue Op, SDValue Zero = DAG.getConstant(0, DL, VT); SDValue One = DAG.getConstant(1, DL, VT); - SDValue MinusRegisterSize = DAG.getConstant(-32, DL, VT); + SDValue MinusRegisterSize = DAG.getSignedConstant(-32, DL, VT); SDValue RegisterSizeMinus1 = DAG.getConstant(32 - 1, DL, VT); SDValue ShamtMinusRegisterSize = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusRegisterSize); @@ -3494,7 +3494,7 @@ SDValue M68kTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG, SDValue Zero = DAG.getConstant(0, DL, VT); SDValue One = DAG.getConstant(1, DL, VT); - SDValue MinusRegisterSize = DAG.getConstant(-32, DL, VT); + SDValue MinusRegisterSize = DAG.getSignedConstant(-32, DL, VT); SDValue RegisterSizeMinus1 = DAG.getConstant(32 - 1, DL, VT); SDValue ShamtMinusRegisterSize = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusRegisterSize); diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index b9003ddbd3187..62647b3128518 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -2786,7 +2786,7 @@ SDValue NVPTXTargetLowering::LowerINT_TO_FP(SDValue Op, return DAG.getNode( ISD::FP_ROUND, Loc, MVT::bf16, DAG.getNode(Op.getOpcode(), Loc, MVT::f32, Op.getOperand(0)), - DAG.getIntPtrConstant(0, Loc)); + DAG.getIntPtrConstant(0, Loc, /*isTarget=*/true)); } // Everything else is considered legal. diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 87a4ad3752c64..f4d3668726164 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -8963,9 +8963,10 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) { if (IsStrict) - FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl, - DAG.getVTList(MVT::f32, MVT::Other), - {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags); + FP = DAG.getNode( + ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other), + {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}, + Flags); else FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)); @@ -9044,9 +9045,9 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, Chain = FP.getValue(1); if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) { if (IsStrict) - FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl, - DAG.getVTList(MVT::f32, MVT::Other), - {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags); + FP = DAG.getNode( + ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other), + {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}, Flags); else FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)); diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index 4e9090307e2f8..a9294e76f8763 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -491,11 +491,18 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) // FP Operations - getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG, - G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM}) + getActionDefinitionsBuilder( + {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM}) .legalFor(ST.hasStdExtF(), {s32}) .legalFor(ST.hasStdExtD(), {s64}) - .legalFor(ST.hasStdExtZfh(), {s16}); + .legalFor(ST.hasStdExtZfh(), {s16}) + .libcallFor({s32, s64}); + + getActionDefinitionsBuilder({G_FNEG, G_FABS}) + .legalFor(ST.hasStdExtF(), {s32}) + .legalFor(ST.hasStdExtD(), {s64}) + .legalFor(ST.hasStdExtZfh(), {s16}) + .lowerFor({s32, s64}); getActionDefinitionsBuilder(G_FREM) .libcallFor({s32, s64}) @@ -506,7 +513,8 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) .legalFor(ST.hasStdExtF(), {{s32, s32}}) .legalFor(ST.hasStdExtD(), {{s64, s64}, {s32, s64}, {s64, s32}}) .legalFor(ST.hasStdExtZfh(), {{s16, s16}, {s16, s32}, {s32, s16}}) - .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}, {s64, s16}}); + .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}, {s64, s16}}) + .lower(); // FIXME: Use Zfhmin. getActionDefinitionsBuilder(G_FPTRUNC) @@ -528,7 +536,8 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) getActionDefinitionsBuilder(G_IS_FPCLASS) .customFor(ST.hasStdExtF(), {{s1, s32}}) .customFor(ST.hasStdExtD(), {{s1, s64}}) - .customFor(ST.hasStdExtZfh(), {{s1, s16}}); + .customFor(ST.hasStdExtZfh(), {{s1, s16}}) + .lowerFor({{s1, s32}, {s1, s64}}); getActionDefinitionsBuilder(G_FCONSTANT) .legalFor(ST.hasStdExtF(), {s32}) diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index f0bc74e331db4..2da32fece061b 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -498,36 +498,6 @@ getPushOrLibCallsSavedInfo(const MachineFunction &MF, return PushOrLibCallsCSI; } -void RISCVFrameLowering::adjustStackForRVV(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, int64_t Amount, - MachineInstr::MIFlag Flag) const { - assert(Amount != 0 && "Did not need to adjust stack pointer for RVV."); - - // Optimize compile time offset case - StackOffset Offset = StackOffset::getScalable(Amount); - if (auto VLEN = STI.getRealVLen()) { - // 1. Multiply the number of v-slots by the (constant) length of register - const int64_t VLENB = *VLEN / 8; - assert(Amount % 8 == 0 && - "Reserve the stack by the multiple of one vector size."); - const int64_t NumOfVReg = Amount / 8; - const int64_t FixedOffset = NumOfVReg * VLENB; - if (!isInt<32>(FixedOffset)) { - report_fatal_error( - "Frame size outside of the signed 32-bit range not supported"); - } - Offset = StackOffset::getFixed(FixedOffset); - } - - const RISCVRegisterInfo &RI = *STI.getRegisterInfo(); - // We must keep the stack pointer aligned through any intermediate - // updates. - RI.adjustReg(MBB, MBBI, DL, SPReg, SPReg, Offset, - Flag, getStackAlign()); -} - static void appendScalableVectorExpression(const TargetRegisterInfo &TRI, SmallVectorImpl &Expr, int FixedOffset, int ScalableOffset, @@ -610,6 +580,25 @@ static MCCFIInstruction createDefCFAOffset(const TargetRegisterInfo &TRI, Comment.str()); } +void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + StackOffset Offset, bool EmitCFI, + unsigned CFIIndex) const { + DebugLoc DL; + const RISCVRegisterInfo *RI = STI.getRegisterInfo(); + const RISCVInstrInfo *TII = STI.getInstrInfo(); + + RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, Offset, MachineInstr::FrameSetup, + getStackAlign()); + + if (EmitCFI) { + // Emit ".cfi_def_cfa_offset StackSize" + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); + } +} + void RISCVFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -726,16 +715,10 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, if (StackSize != 0) { // Allocate space on the stack if necessary. - RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, - StackOffset::getFixed(-StackSize), MachineInstr::FrameSetup, - getStackAlign()); - - // Emit ".cfi_def_cfa_offset RealStackSize" unsigned CFIIndex = MF.addFrameInst( MCCFIInstruction::cfiDefCfaOffset(nullptr, RealStackSize)); - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlag(MachineInstr::FrameSetup); + allocateStack(MBB, MBBI, StackOffset::getFixed(-StackSize), + /*EmitCFI=*/ true, CFIIndex); } // The frame pointer is callee-saved, and code has been generated for us to @@ -776,25 +759,22 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, getStackSizeWithRVVPadding(MF) - FirstSPAdjustAmount; assert(SecondSPAdjustAmount > 0 && "SecondSPAdjustAmount should be greater than zero"); - RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, - StackOffset::getFixed(-SecondSPAdjustAmount), - MachineInstr::FrameSetup, getStackAlign()); // If we are using a frame-pointer, and thus emitted ".cfi_def_cfa fp, 0", // don't emit an sp-based .cfi_def_cfa_offset - if (!hasFP(MF)) { - // Emit ".cfi_def_cfa_offset StackSize" - unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset( - nullptr, getStackSizeWithRVVPadding(MF))); - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlag(MachineInstr::FrameSetup); - } + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset( + nullptr, getStackSizeWithRVVPadding(MF))); + allocateStack(MBB, MBBI, StackOffset::getFixed(-SecondSPAdjustAmount), + !hasFP(MF), CFIIndex); } if (RVVStackSize) { - adjustStackForRVV(MF, MBB, MBBI, DL, -RVVStackSize, - MachineInstr::FrameSetup); + // We must keep the stack pointer aligned through any intermediate + // updates. + RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, + StackOffset::getScalable(-RVVStackSize), + MachineInstr::FrameSetup, getStackAlign()); + if (!hasFP(MF)) { // Emit .cfi_def_cfa_expression "sp + StackSize + RVVStackSize * vlenb". unsigned CFIIndex = MF.addFrameInst(createDefCFAExpression( @@ -919,8 +899,9 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, // If RestoreSPFromFP the stack pointer will be restored using the frame // pointer value. if (!RestoreSPFromFP) - adjustStackForRVV(MF, MBB, LastFrameDestroy, DL, RVVStackSize, - MachineInstr::FrameDestroy); + RI->adjustReg(MBB, LastFrameDestroy, DL, SPReg, SPReg, + StackOffset::getScalable(RVVStackSize), + MachineInstr::FrameDestroy, getStackAlign()); if (!hasFP(MF)) { unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( @@ -1752,8 +1733,7 @@ void RISCVFrameLowering::emitCalleeSavedRVVPrologCFI( if (!HasFP) { uint64_t ScalarLocalVarSize = MFI.getStackSize() - RVFI->getCalleeSavedStackSize() - - RVFI->getRVPushStackSize() - RVFI->getVarArgsSaveSize() + - RVFI->getRVVPadding(); + RVFI->getVarArgsSaveSize() + RVFI->getRVVPadding(); FixedSize -= ScalarLocalVarSize; } diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h index c106b7b675465..84a8fbd117a22 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -78,6 +78,9 @@ class RISCVFrameLowering : public TargetFrameLowering { return StackId != TargetStackID::ScalableVector; } + void allocateStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + StackOffset Offset, bool EmitCFI, unsigned CFIIndex) const; + protected: const RISCVSubtarget &STI; @@ -85,9 +88,6 @@ class RISCVFrameLowering : public TargetFrameLowering { private: void determineFrameLayout(MachineFunction &MF) const; - void adjustStackForRVV(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, const DebugLoc &DL, - int64_t Amount, MachineInstr::MIFlag Flag) const; void emitCalleeSavedRVVPrologCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, bool HasFP) const; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 6eae756b25fb5..329b42d621cee 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -498,7 +498,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI, ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP, ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2, - ISD::FLOG10}, + ISD::FLOG10, ISD::FLDEXP, ISD::FFREXP}, MVT::f16, Promote); // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have @@ -506,7 +506,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT, ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN, - ISD::STRICT_FTRUNC}, + ISD::STRICT_FTRUNC, ISD::STRICT_FLDEXP}, MVT::f16, Promote); // We need to custom promote this. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index c3aa367486627..005cba5d35610 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -78,6 +78,11 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override; + bool shouldBreakCriticalEdgeToSink(MachineInstr &MI) const override { + return MI.getOpcode() == RISCV::ADDI && MI.getOperand(1).isReg() && + MI.getOperand(1).getReg() == RISCV::X0; + } + void copyPhysRegVector(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index b01af468d9ea2..2924083ece344 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -285,7 +285,8 @@ def : Pat<(riscv_fclass FPR64:$rs1), (FCLASS_D $rs1)>; def : PatFprFpr; def : PatFprFpr; -def : Pat<(fcopysign FPR64:$rs1, (fneg FPR64:$rs2)), (FSGNJN_D $rs1, $rs2)>; +def : Pat<(fcopysign FPR64:$rs1, (fneg FPR64:$rs2)), + (FSGNJN_D FPR64:$rs1, FPR64:$rs2)>; def : Pat<(fcopysign FPR64:$rs1, FPR32:$rs2), (FSGNJ_D $rs1, (FCVT_D_S $rs2, FRM_RNE))>; def : Pat<(fcopysign FPR32:$rs1, FPR64:$rs2), (FSGNJ_S $rs1, (FCVT_S_D $rs2, @@ -323,7 +324,7 @@ def : Pat<(riscv_fclass FPR64INX:$rs1), (FCLASS_D_INX $rs1)>; def : PatFprFpr; def : PatFprFpr; def : Pat<(fcopysign FPR64INX:$rs1, (fneg FPR64INX:$rs2)), - (FSGNJN_D_INX $rs1, $rs2)>; + (FSGNJN_D_INX FPR64INX:$rs1, FPR64INX:$rs2)>; def : Pat<(fcopysign FPR64INX:$rs1, FPR32INX:$rs2), (FSGNJ_D_INX $rs1, (f64 (FCVT_D_S_INX $rs2, FRM_RNE)))>; def : Pat<(fcopysign FPR32INX:$rs1, FPR64INX:$rs2), @@ -361,7 +362,7 @@ def : Pat<(riscv_fclass FPR64IN32X:$rs1), (FCLASS_D_IN32X $rs1)>; def : PatFprFpr; def : PatFprFpr; def : Pat<(fcopysign FPR64IN32X:$rs1, (fneg FPR64IN32X:$rs2)), - (FSGNJN_D_IN32X $rs1, $rs2)>; + (FSGNJN_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2)>; def : Pat<(fcopysign FPR64IN32X:$rs1, FPR32INX:$rs2), (FSGNJ_D_IN32X $rs1, (FCVT_D_S_IN32X $rs2, FRM_RNE))>; def : Pat<(fcopysign FPR32INX:$rs1, FPR64IN32X:$rs2), diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td index 2c27e3950f07f..6c41c53bb301f 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -570,7 +570,8 @@ defm : PatFprFpr_m; } let Predicates = [HasStdExtF] in { -def : Pat<(fcopysign FPR32:$rs1, (fneg FPR32:$rs2)), (FSGNJN_S $rs1, $rs2)>; +def : Pat<(fcopysign FPR32:$rs1, (fneg FPR32:$rs2)), + (FSGNJN_S FPR32:$rs1, FPR32:$rs2)>; // fmadd: rs1 * rs2 + rs3 def : Pat<(any_fma FPR32:$rs1, FPR32:$rs2, FPR32:$rs3), @@ -594,7 +595,8 @@ def : Pat<(fneg (any_fma_nsz FPR32:$rs1, FPR32:$rs2, FPR32:$rs3)), } // Predicates = [HasStdExtF] let Predicates = [HasStdExtZfinx] in { -def : Pat<(fcopysign FPR32INX:$rs1, (fneg FPR32INX:$rs2)), (FSGNJN_S_INX $rs1, $rs2)>; +def : Pat<(fcopysign FPR32INX:$rs1, (fneg FPR32INX:$rs2)), + (FSGNJN_S_INX FPR32INX:$rs1, FPR32INX:$rs2)>; // fmadd: rs1 * rs2 + rs3 def : Pat<(any_fma FPR32INX:$rs1, FPR32INX:$rs2, FPR32INX:$rs3), diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td index e2e99cc3f2b72..625011c3b9f7c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -291,7 +291,8 @@ def : Pat<(riscv_fclass (f16 FPR16:$rs1)), (FCLASS_H $rs1)>; def : PatFprFpr; def : PatFprFpr; -def : Pat<(f16 (fcopysign FPR16:$rs1, (f16 (fneg FPR16:$rs2)))), (FSGNJN_H $rs1, $rs2)>; +def : Pat<(f16 (fcopysign FPR16:$rs1, (f16 (fneg FPR16:$rs2)))), + (FSGNJN_H FPR16:$rs1, FPR16:$rs2)>; def : Pat<(f16 (fcopysign FPR16:$rs1, FPR32:$rs2)), (FSGNJ_H $rs1, (f16 (FCVT_H_S $rs2, FRM_DYN)))>; @@ -334,7 +335,8 @@ def : Pat<(riscv_fclass FPR16INX:$rs1), (FCLASS_H_INX $rs1)>; def : PatFprFpr; def : PatFprFpr; -def : Pat<(fcopysign FPR16INX:$rs1, (fneg FPR16INX:$rs2)), (FSGNJN_H_INX $rs1, $rs2)>; +def : Pat<(fcopysign FPR16INX:$rs1, (fneg FPR16INX:$rs2)), + (FSGNJN_H_INX FPR16INX:$rs1, FPR16INX:$rs2)>; def : Pat<(fcopysign FPR16INX:$rs1, FPR32INX:$rs2), (FSGNJ_H_INX $rs1, (FCVT_H_S_INX $rs2, FRM_DYN))>; diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index ff250b2c9df81..cfcc3119257f6 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -184,6 +184,23 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB, const RISCVSubtarget &ST = MF.getSubtarget(); const RISCVInstrInfo *TII = ST.getInstrInfo(); + // Optimize compile time offset case + if (Offset.getScalable()) { + if (auto VLEN = ST.getRealVLen()) { + // 1. Multiply the number of v-slots by the (constant) length of register + const int64_t VLENB = *VLEN / 8; + assert(Offset.getScalable() % (RISCV::RVVBitsPerBlock / 8) == 0 && + "Reserve the stack by the multiple of one vector size."); + const int64_t NumOfVReg = Offset.getScalable() / 8; + const int64_t FixedOffset = NumOfVReg * VLENB; + if (!isInt<32>(FixedOffset)) { + report_fatal_error( + "Frame size outside of the signed 32-bit range not supported"); + } + Offset = StackOffset::getFixed(FixedOffset + Offset.getFixed()); + } + } + bool KillSrcReg = false; if (Offset.getScalable()) { @@ -456,7 +473,6 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); - const RISCVSubtarget &ST = MF.getSubtarget(); DebugLoc DL = MI.getDebugLoc(); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); @@ -467,19 +483,6 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (!IsRVVSpill) Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm()); - if (Offset.getScalable() && - ST.getRealMinVLen() == ST.getRealMaxVLen()) { - // For an exact VLEN value, scalable offsets become constant and thus - // can be converted entirely into fixed offsets. - int64_t FixedValue = Offset.getFixed(); - int64_t ScalableValue = Offset.getScalable(); - assert(ScalableValue % 8 == 0 && - "Scalable offset is not a multiple of a single vector size."); - int64_t NumOfVReg = ScalableValue / 8; - int64_t VLENB = ST.getRealMinVLen() / 8; - Offset = StackOffset::getFixed(FixedValue + NumOfVReg * VLENB); - } - if (!isInt<32>(Offset.getFixed())) { report_fatal_error( "Frame offsets outside of the signed 32-bit range not supported"); diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp index 03397e1e0d89e..426d368204904 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -186,11 +186,6 @@ bool RISCVSubtarget::useRVVForFixedLengthVectors() const { bool RISCVSubtarget::enableSubRegLiveness() const { return true; } -void RISCVSubtarget::getPostRAMutations( - std::vector> &Mutations) const { - Mutations.push_back(createMacroFusionDAGMutation(getMacroFusions())); -} - /// Enable use of alias analysis during code generation (during MI /// scheduling, DAGCombine, etc.). bool RISCVSubtarget::useAA() const { return UseAA; } diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index f2c0a3d85c998..043838e13b964 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -301,9 +301,6 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { bool enableSubRegLiveness() const override; - void getPostRAMutations(std::vector> - &Mutations) const override; - bool useAA() const override; unsigned getCacheLineSize() const override { diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index d9729e06f7aea..8f0ef69258b16 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -925,6 +925,14 @@ static const CostTblEntry VectorIntrinsicCostTable[]{ {Intrinsic::ctpop, MVT::i16, 19}, {Intrinsic::ctpop, MVT::i32, 20}, {Intrinsic::ctpop, MVT::i64, 21}, + {Intrinsic::ctlz, MVT::i8, 19}, + {Intrinsic::ctlz, MVT::i16, 28}, + {Intrinsic::ctlz, MVT::i32, 31}, + {Intrinsic::ctlz, MVT::i64, 35}, + {Intrinsic::cttz, MVT::i8, 16}, + {Intrinsic::cttz, MVT::i16, 23}, + {Intrinsic::cttz, MVT::i32, 24}, + {Intrinsic::cttz, MVT::i64, 25}, {Intrinsic::vp_ctpop, MVT::i8, 12}, {Intrinsic::vp_ctpop, MVT::i16, 19}, {Intrinsic::vp_ctpop, MVT::i32, 20}, @@ -1013,6 +1021,8 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return LT.first; break; } + case Intrinsic::cttz: + case Intrinsic::ctlz: case Intrinsic::ctpop: { auto LT = getTypeLegalizationCost(RetTy); if (ST->hasVInstructions() && ST->hasStdExtZvbb() && LT.second.isVector()) @@ -1024,7 +1034,9 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, if (ST->hasVInstructions() && LT.second.isVector()) { // vrsub.vi v10, v8, 0 // vmax.vv v8, v8, v10 - return LT.first * 2; + return LT.first * + getRISCVInstructionCost({RISCV::VRSUB_VI, RISCV::VMAX_VV}, + LT.second, CostKind); } break; } @@ -1111,39 +1123,6 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return getArithmeticInstrCost(*FOp, ICA.getReturnType(), CostKind); break; } - // vp int cast ops. - case Intrinsic::vp_trunc: - case Intrinsic::vp_zext: - case Intrinsic::vp_sext: - // vp float cast ops. - case Intrinsic::vp_fptoui: - case Intrinsic::vp_fptosi: - case Intrinsic::vp_uitofp: - case Intrinsic::vp_sitofp: - case Intrinsic::vp_fptrunc: - case Intrinsic::vp_fpext: { - std::optional FOp = - VPIntrinsic::getFunctionalOpcodeForVP(ICA.getID()); - assert(FOp.has_value() && !ICA.getArgTypes().empty()); - return getCastInstrCost(*FOp, RetTy, ICA.getArgTypes()[0], - TTI::CastContextHint::None, CostKind); - break; - } - - // vp compare - case Intrinsic::vp_icmp: - case Intrinsic::vp_fcmp: { - Intrinsic::ID IID = ICA.getID(); - std::optional FOp = VPIntrinsic::getFunctionalOpcodeForVP(IID); - // We can only handle vp_cmp intrinsics with underlying instructions. - if (!ICA.getInst()) - break; - - assert(FOp); - auto *UI = cast(ICA.getInst()); - return getCmpSelInstrCost(*FOp, ICA.getArgTypes()[0], ICA.getReturnType(), - UI->getPredicate(), CostKind); - } case Intrinsic::vp_select: { Intrinsic::ID IID = ICA.getID(); std::optional FOp = VPIntrinsic::getFunctionalOpcodeForVP(IID); @@ -1155,6 +1134,16 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return getCmpSelInstrCost(Instruction::Select, ICA.getReturnType(), ICA.getArgTypes()[0], CmpInst::BAD_ICMP_PREDICATE, CostKind); + case Intrinsic::experimental_vp_splat: { + auto LT = getTypeLegalizationCost(RetTy); + // TODO: Lower i1 experimental_vp_splat + if (!ST->hasVInstructions() || LT.second.getScalarType() == MVT::i1) + return InstructionCost::getInvalid(); + return LT.first * getRISCVInstructionCost(LT.second.isFloatingPoint() + ? RISCV::VFMV_V_F + : RISCV::VMV_V_X, + LT.second, CostKind); + } case Intrinsic::vp_reduce_add: case Intrinsic::vp_reduce_fadd: case Intrinsic::vp_reduce_mul: @@ -2328,20 +2317,6 @@ bool RISCVTTIImpl::isLegalMaskedCompressStore(Type *DataTy, Align Alignment) { return true; } -bool RISCVTTIImpl::areInlineCompatible(const Function *Caller, - const Function *Callee) const { - const TargetMachine &TM = getTLI()->getTargetMachine(); - - const FeatureBitset &CallerBits = - TM.getSubtargetImpl(*Caller)->getFeatureBits(); - const FeatureBitset &CalleeBits = - TM.getSubtargetImpl(*Callee)->getFeatureBits(); - - // Inline a callee if its target-features are a subset of the callers - // target-features. - return (CallerBits & CalleeBits) == CalleeBits; -} - /// See if \p I should be considered for address type promotion. We check if \p /// I is a sext with right type and used in memory accesses. If it used in a /// "complex" getelementptr, we allow it to be promoted without finding other diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 498f48353dc0c..6fd36e90a02dd 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -60,9 +60,6 @@ class RISCVTTIImpl : public BasicTTIImplBase { : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} - bool areInlineCompatible(const Function *Caller, - const Function *Callee) const; - /// Return the cost of materializing an immediate for a value operand of /// a store instruction. InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 90d7bd934af40..403d238aa5b52 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -671,7 +671,7 @@ void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM, } // Lower the displacement to a TargetConstant. - Disp = CurDAG->getTargetConstant(AM.Disp, SDLoc(Base), VT); + Disp = CurDAG->getSignedTargetConstant(AM.Disp, SDLoc(Base), VT); } void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM, @@ -2024,8 +2024,9 @@ SDValue SystemZDAGToDAGISel::expandSelectBoolean(SDNode *Node) { CurDAG->getConstant(IPM.XORValue, DL, MVT::i32)); if (IPM.AddValue) - Result = CurDAG->getNode(ISD::ADD, DL, MVT::i32, Result, - CurDAG->getConstant(IPM.AddValue, DL, MVT::i32)); + Result = + CurDAG->getNode(ISD::ADD, DL, MVT::i32, Result, + CurDAG->getSignedConstant(IPM.AddValue, DL, MVT::i32)); EVT VT = Node->getValueType(0); if (VT == MVT::i32 && IPM.Bit == 31) { diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 78d91299a357d..8f505b7e198cf 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1444,15 +1444,15 @@ void SystemZTargetLowering::LowerAsmOperandForConstraint( case 'K': // Signed 16-bit constant if (auto *C = dyn_cast(Op)) if (isInt<16>(C->getSExtValue())) - Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), - Op.getValueType())); + Ops.push_back(DAG.getSignedTargetConstant( + C->getSExtValue(), SDLoc(Op), Op.getValueType())); return; case 'L': // Signed 20-bit displacement (on all targets we support) if (auto *C = dyn_cast(Op)) if (isInt<20>(C->getSExtValue())) - Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), - Op.getValueType())); + Ops.push_back(DAG.getSignedTargetConstant( + C->getSExtValue(), SDLoc(Op), Op.getValueType())); return; case 'M': // 0x7fffffff @@ -2578,7 +2578,7 @@ static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, // Make sure that the second operand is an i32 with the right value. if (C.Op1.getValueType() != MVT::i32 || Value != ConstOp1->getZExtValue()) - C.Op1 = DAG.getConstant(Value, DL, MVT::i32); + C.Op1 = DAG.getConstant((uint32_t)Value, DL, MVT::i32); } // Return true if Op is either an unextended load, or a load suitable @@ -3410,7 +3410,7 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, } if (Invert) { SDValue Mask = - DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64)); + DAG.getSplatBuildVector(VT, DL, DAG.getAllOnesConstant(DL, MVT::i64)); Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask); } if (Chain && Chain.getNode() != Cmp.getNode()) { @@ -3571,7 +3571,7 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, // addition for it. if (Offset != 0) Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result, - DAG.getConstant(Offset, DL, PtrVT)); + DAG.getSignedConstant(Offset, DL, PtrVT)); return Result; } @@ -3834,7 +3834,7 @@ SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op, const auto *TFL = Subtarget.getFrameLowering(); int Offset = TFL->getReturnAddressOffset(MF); SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr, - DAG.getConstant(Offset, DL, PtrVT)); + DAG.getSignedConstant(Offset, DL, PtrVT)); return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); } @@ -4584,7 +4584,7 @@ static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, // Get the address of the containing word. AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, - DAG.getConstant(-4, DL, PtrVT)); + DAG.getSignedConstant(-4, DL, PtrVT)); // Get the number of bits that the word must be rotated left in order // to bring the field to the top bits of a GR32. @@ -4623,7 +4623,8 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, if (Opcode == SystemZISD::ATOMIC_LOADW_SUB) if (auto *Const = dyn_cast(Src2)) { Opcode = SystemZISD::ATOMIC_LOADW_ADD; - Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType()); + Src2 = DAG.getSignedConstant(-Const->getSExtValue(), DL, + Src2.getValueType()); } SDValue AlignedAddr, BitShift, NegBitShift; diff --git a/llvm/lib/Target/SystemZ/SystemZOperands.td b/llvm/lib/Target/SystemZ/SystemZOperands.td index 0221e2c53f2f4..64345ca3a1394 100644 --- a/llvm/lib/Target/SystemZ/SystemZOperands.td +++ b/llvm/lib/Target/SystemZ/SystemZOperands.td @@ -220,8 +220,8 @@ def NEGLF32 : SDNodeXFormgetTargetConstant(int8_t(N->getZExtValue()), SDLoc(N), - MVT::i64); + return CurDAG->getSignedTargetConstant(int8_t(N->getZExtValue()), SDLoc(N), + MVT::i64); }]>; // Truncate an immediate to a 8-bit unsigned quantity. @@ -244,14 +244,14 @@ def UIMM12 : SDNodeXFormgetTargetConstant(int16_t(N->getZExtValue()), SDLoc(N), - MVT::i64); + return CurDAG->getSignedTargetConstant(int16_t(N->getZExtValue()), SDLoc(N), + MVT::i64); }]>; // Negate and then truncate an immediate to a 16-bit signed quantity. def NEGSIMM16 : SDNodeXFormgetTargetConstant(int16_t(-N->getZExtValue()), SDLoc(N), - MVT::i64); + return CurDAG->getSignedTargetConstant(int16_t(-N->getZExtValue()), SDLoc(N), + MVT::i64); }]>; // Truncate an immediate to a 16-bit unsigned quantity. @@ -268,8 +268,8 @@ def SIMM32 : SDNodeXFormgetTargetConstant(int32_t(-N->getZExtValue()), SDLoc(N), - MVT::i64); + return CurDAG->getSignedTargetConstant(int32_t(-N->getZExtValue()), SDLoc(N), + MVT::i64); }]>; // Truncate an immediate to a 32-bit unsigned quantity. diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index 4eb58e27f7ad7..c182c9890509f 100644 --- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -53,7 +53,7 @@ static SDValue emitMemMemReg(SelectionDAG &DAG, const SDLoc &DL, unsigned Op, int64_t Adj = getMemMemLenAdj(Op); SDValue LenAdj = DAG.getNode(ISD::ADD, DL, MVT::i64, DAG.getZExtOrTrunc(Size, DL, MVT::i64), - DAG.getConstant(0 - Adj, DL, MVT::i64)); + DAG.getSignedConstant(0 - Adj, DL, MVT::i64)); return createMemMemNode(DAG, DL, Op, Chain, Dst, Src, LenAdj, Byte); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index f96e3232b93f4..3d678e5384166 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -104,24 +104,6 @@ TTI::ReductionShuffle WebAssemblyTTIImpl::getPreferredExpandedReductionShuffle( return TTI::ReductionShuffle::SplitHalf; } -bool WebAssemblyTTIImpl::areInlineCompatible(const Function *Caller, - const Function *Callee) const { - // Allow inlining only when the Callee has a subset of the Caller's - // features. In principle, we should be able to inline regardless of any - // features because WebAssembly supports features at module granularity, not - // function granularity, but without this restriction it would be possible for - // a module to "forget" about features if all the functions that used them - // were inlined. - const TargetMachine &TM = getTLI()->getTargetMachine(); - - const FeatureBitset &CallerBits = - TM.getSubtargetImpl(*Caller)->getFeatureBits(); - const FeatureBitset &CalleeBits = - TM.getSubtargetImpl(*Callee)->getFeatureBits(); - - return (CallerBits & CalleeBits) == CalleeBits; -} - void WebAssemblyTTIImpl::getUnrollingPreferences( Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h index 2ce6cbf3ba026..9691120b2e531 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -72,9 +72,6 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase { TTI::ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const; - bool areInlineCompatible(const Function *Caller, - const Function *Callee) const; - bool supportsTailCalls() const; bool isProfitableToSinkOperands(Instruction *I, diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 8be8f0b6d735c..01b6c84419fc8 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -634,6 +634,7 @@ const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, {"reloc_riprel_4byte_relax_rex2", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"reloc_riprel_6byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, {"reloc_signed_4byte", 0, 32, 0}, {"reloc_signed_4byte_relax", 0, 32, 0}, {"reloc_global_offset_table", 0, 32, 0}, @@ -683,6 +684,7 @@ static unsigned getFixupKindSize(unsigned Kind) { case X86::reloc_riprel_4byte_relax_rex2: case X86::reloc_riprel_4byte_movq_load: case X86::reloc_riprel_4byte_movq_load_rex2: + case X86::reloc_riprel_6byte_relax: case X86::reloc_signed_4byte: case X86::reloc_signed_4byte_relax: case X86::reloc_global_offset_table: diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp index 349cd011eff30..29a1af97d24fa 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp @@ -77,6 +77,7 @@ static X86_64RelType getType64(MCFixupKind Kind, case X86::reloc_riprel_4byte_relax_rex2: case X86::reloc_riprel_4byte_movq_load: case X86::reloc_riprel_4byte_movq_load_rex2: + case X86::reloc_riprel_6byte_relax: return RT64_32; case X86::reloc_branch_4byte_pcrel: Modifier = MCSymbolRefExpr::VK_PLT; @@ -202,6 +203,8 @@ static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc, if ((unsigned)Kind == X86::reloc_riprel_4byte_movq_load_rex2 || (unsigned)Kind == X86::reloc_riprel_4byte_relax_rex2) return ELF::R_X86_64_CODE_4_GOTTPOFF; + else if ((unsigned)Kind == X86::reloc_riprel_6byte_relax) + return ELF::R_X86_64_CODE_6_GOTTPOFF; return ELF::R_X86_64_GOTTPOFF; case MCSymbolRefExpr::VK_TLSLD: checkIs32(Ctx, Loc, Type); @@ -227,6 +230,8 @@ static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc, case X86::reloc_riprel_4byte_relax_rex2: case X86::reloc_riprel_4byte_movq_load_rex2: return ELF::R_X86_64_CODE_4_GOTPCRELX; + case X86::reloc_riprel_6byte_relax: + return ELF::R_X86_64_CODE_6_GOTTPOFF; } llvm_unreachable("unexpected relocation type!"); case MCSymbolRefExpr::VK_GOTPCREL_NORELAX: diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86FixupKinds.h b/llvm/lib/Target/X86/MCTargetDesc/X86FixupKinds.h index 29bb7eebae3f2..52592a5a13b97 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86FixupKinds.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86FixupKinds.h @@ -24,6 +24,8 @@ enum Fixups { // instruction with rex prefix reloc_riprel_4byte_relax_rex2, // 32-bit rip-relative in relaxable // instruction with rex2 prefix + reloc_riprel_6byte_relax, // 32-bit rip-relative in relaxable + // instruction with APX NDD reloc_signed_4byte, // 32-bit signed. Unlike FK_Data_4 // this will be sign extended at // runtime. diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index f12275ffaba8b..052d732e4d019 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -579,7 +579,9 @@ void X86MCCodeEmitter::emitImmediate(const MCOperand &DispOp, SMLoc Loc, // this needs to be a GOTPC32 relocation. if (startsWithGlobalOffsetTable(Expr) != GOT_None) FixupKind = MCFixupKind(X86::reloc_global_offset_table); - } + } else if (FixupKind == MCFixupKind(X86::reloc_riprel_6byte_relax)) + ImmOffset -= 6; + if (FixupKind == FK_PCRel_2) ImmOffset -= 2; if (FixupKind == FK_PCRel_1) @@ -670,6 +672,12 @@ void X86MCCodeEmitter::emitMemModRMByte( return Kind == REX2 ? X86::reloc_riprel_4byte_relax_rex2 : Kind == REX ? X86::reloc_riprel_4byte_relax_rex : X86::reloc_riprel_4byte_relax; + case X86::ADD64rm_NF: + case X86::ADD64rm_ND: + case X86::ADD64mr_ND: + case X86::ADD64mr_NF_ND: + case X86::ADD64rm_NF_ND: + return X86::reloc_riprel_6byte_relax; } }(); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index 41ce5c9fcb82a..413650e90de65 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -69,7 +69,8 @@ static bool isFixupKindRIPRel(unsigned Kind) { Kind == X86::reloc_riprel_4byte_movq_load_rex2 || Kind == X86::reloc_riprel_4byte_relax || Kind == X86::reloc_riprel_4byte_relax_rex || - Kind == X86::reloc_riprel_4byte_relax_rex2; + Kind == X86::reloc_riprel_4byte_relax_rex2 || + Kind == X86::reloc_riprel_6byte_relax; } static unsigned getFixupKindLog2Size(unsigned Kind) { @@ -91,6 +92,7 @@ static unsigned getFixupKindLog2Size(unsigned Kind) { case X86::reloc_signed_4byte: case X86::reloc_signed_4byte_relax: case X86::reloc_branch_4byte_pcrel: + case X86::reloc_riprel_6byte_relax: case FK_Data_4: return 2; case FK_Data_8: return 3; } diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp index 7740500fb4183..48d4707bbe1eb 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp @@ -70,6 +70,7 @@ unsigned X86WinCOFFObjectWriter::getRelocType(MCContext &Ctx, case X86::reloc_riprel_4byte_relax: case X86::reloc_riprel_4byte_relax_rex: case X86::reloc_riprel_4byte_relax_rex2: + case X86::reloc_riprel_6byte_relax: case X86::reloc_branch_4byte_pcrel: return COFF::IMAGE_REL_AMD64_REL32; case FK_Data_4: diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index db39340275778..e4533570f7508 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1711,6 +1711,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationPromotedToType(Opc, MVT::v8f16, MVT::v8f32); setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32); } + setOperationAction(ISD::SETCC, MVT::v8f16, Custom); + setOperationAction(ISD::SETCC, MVT::v16f16, Custom); } // This block controls legalization of the mask vector sizes that are @@ -2046,6 +2048,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Custom); for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) setOperationPromotedToType(Opc, MVT::v32f16, MVT::v32f32); + setOperationAction(ISD::SETCC, MVT::v32f16, Custom); for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) { setOperationAction(ISD::MLOAD, VT, Legal); @@ -2401,6 +2404,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationPromotedToType(Opc, MVT::v8bf16, MVT::v8f32); setOperationPromotedToType(Opc, MVT::v16bf16, MVT::v16f32); } + setOperationAction(ISD::SETCC, MVT::v8bf16, Custom); + setOperationAction(ISD::SETCC, MVT::v16bf16, Custom); setOperationAction(ISD::FP_ROUND, MVT::v8bf16, Custom); addLegalFPImmediate(APFloat::getZero(APFloat::BFloat())); } @@ -2411,6 +2416,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setF16Action(MVT::v32bf16, Expand); for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) setOperationPromotedToType(Opc, MVT::v32bf16, MVT::v32f32); + setOperationAction(ISD::SETCC, MVT::v32bf16, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v32bf16, Custom); setOperationAction(ISD::FP_ROUND, MVT::v16bf16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32bf16, Custom); @@ -14300,9 +14306,17 @@ static SDValue lowerV8F16Shuffle(const SDLoc &DL, ArrayRef Mask, // sub-512-bit shuffles are padded to 512-bits for the shuffle and then // the active subvector is extracted. static SDValue lowerShuffleWithPERMV(const SDLoc &DL, MVT VT, - ArrayRef Mask, SDValue V1, SDValue V2, - const X86Subtarget &Subtarget, + ArrayRef OriginalMask, SDValue V1, + SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { + // Commute binary inputs so V2 is a load to simplify VPERMI2/T2 folds. + SmallVector Mask(OriginalMask); + if (!V2.isUndef() && isShuffleFoldableLoad(V1) && + !isShuffleFoldableLoad(V2)) { + ShuffleVectorSDNode::commuteMask(Mask); + std::swap(V1, V2); + } + MVT MaskVT = VT.changeTypeToInteger(); SDValue MaskNode; MVT ShuffleVT = VT; @@ -19587,7 +19601,7 @@ static SDValue promoteXINT_TO_FP(SDValue Op, const SDLoc &dl, MVT VT = Op.getSimpleValueType(); MVT NVT = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32; - SDValue Rnd = DAG.getIntPtrConstant(0, dl); + SDValue Rnd = DAG.getIntPtrConstant(0, dl, /*isTarget=*/true); if (IsStrict) return DAG.getNode( ISD::STRICT_FP_ROUND, dl, {VT, MVT::Other}, @@ -20258,7 +20272,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, if (DstVT == MVT::f80) return Add; return DAG.getNode(ISD::STRICT_FP_ROUND, dl, {DstVT, MVT::Other}, - {Add.getValue(1), Add, DAG.getIntPtrConstant(0, dl)}); + {Add.getValue(1), Add, + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}); } unsigned Opc = ISD::FADD; // Windows needs the precision control changed to 80bits around this add. @@ -23388,14 +23403,12 @@ static unsigned translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0, return SSECC; } -/// Break a VSETCC 256-bit integer VSETCC into two new 128 ones and then +/// Break a VSETCC 256/512-bit vector into two new 128/256 ones and then /// concatenate the result back. -static SDValue splitIntVSETCC(EVT VT, SDValue LHS, SDValue RHS, - ISD::CondCode Cond, SelectionDAG &DAG, - const SDLoc &dl) { - assert(VT.isInteger() && VT == LHS.getValueType() && - VT == RHS.getValueType() && "Unsupported VTs!"); - +static SDValue splitVSETCC(EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, + SelectionDAG &DAG, const SDLoc &dl) { + assert(VT.isInteger() && LHS.getValueType() == RHS.getValueType() && + "Unsupported VTs!"); SDValue CC = DAG.getCondCode(Cond); // Extract the LHS Lo/Hi vectors @@ -23536,18 +23549,43 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, SDValue CC = Op.getOperand(IsStrict ? 3 : 2); MVT VT = Op->getSimpleValueType(0); ISD::CondCode Cond = cast(CC)->get(); - bool isFP = Op1.getSimpleValueType().isFloatingPoint(); + MVT OpVT = Op0.getSimpleValueType(); SDLoc dl(Op); - if (isFP) { - MVT EltVT = Op0.getSimpleValueType().getVectorElementType(); + if (OpVT.isFloatingPoint()) { + MVT EltVT = OpVT.getVectorElementType(); assert(EltVT == MVT::bf16 || EltVT == MVT::f16 || EltVT == MVT::f32 || EltVT == MVT::f64); - if (isSoftF16(EltVT, Subtarget)) - return SDValue(); - bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS; SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); + if (isSoftF16(EltVT, Subtarget)) { + // Break 256-bit FP vector compare into smaller ones. + if (OpVT.is256BitVector() && !Subtarget.useAVX512Regs()) + return splitVSETCC(VT, Op0, Op1, Cond, DAG, dl); + + // Break 512-bit FP vector compare into smaller ones. + if (OpVT.is512BitVector()) + return splitVSETCC(VT, Op0, Op1, Cond, DAG, dl); + + MVT NVT = OpVT.changeVectorElementType(MVT::f32); + if (IsStrict) { + Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Chain, Op0}); + Op1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Chain, Op1}); + return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other}, + {Chain, Op0, Op1, CC}); + } + MVT DVT = VT.getVectorElementType() == MVT::i16 + ? VT.changeVectorElementType(MVT::i32) + : VT; + SDValue Cmp = DAG.getNode(Op.getOpcode(), dl, DVT, + DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op0), + DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op1), CC); + return DVT == VT ? Cmp : DAG.getNode(ISD::TRUNCATE, dl, VT, Cmp); + } + + bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS; // If we have a strict compare with a vXi1 result and the input is 128/256 // bits we can't use a masked compare unless we have VLX. If we use a wider @@ -23758,12 +23796,12 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, // Break 256-bit integer vector compare into smaller ones. if (VT.is256BitVector() && !Subtarget.hasInt256()) - return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl); + return splitVSETCC(VT, Op0, Op1, Cond, DAG, dl); // Break 512-bit integer vector compare into smaller ones. // TODO: Try harder to use VPCMPx + VPMOV2x? if (VT.is512BitVector()) - return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl); + return splitVSETCC(VT, Op0, Op1, Cond, DAG, dl); // If we have a limit constant, try to form PCMPGT (signed cmp) to avoid // not-of-PCMPEQ: @@ -42244,6 +42282,17 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL, DAG.getIntPtrConstant(0, DL)); } } + SmallVector Ops; + SmallVector Mask; + if (isShuffleFoldableLoad(N.getOperand(0)) && + !isShuffleFoldableLoad(N.getOperand(2)) && + getTargetShuffleMask(N, /*AllowSentinelZero=*/false, Ops, Mask)) { + ShuffleVectorSDNode::commuteMask(Mask); + SDValue NewMask = getConstVector( + Mask, N.getOperand(1).getSimpleValueType(), DAG, DL, /*IsMask=*/true); + return DAG.getNode(X86ISD::VPERMV3, DL, VT, N.getOperand(2), NewMask, + N.getOperand(0)); + } return SDValue(); } default: @@ -59237,6 +59286,15 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const { return Ld->getBasePtr() == St->getBasePtr(); }; + auto IsFoldableZext = [](SDValue Op) { + if (!Op.hasOneUse()) + return false; + SDNode *User = *Op->use_begin(); + EVT VT = User->getValueType(0); + return (User->getOpcode() == ISD::ZERO_EXTEND && + (VT == MVT::i32 || VT == MVT::i64)); + }; + bool Commute = false; switch (Op.getOpcode()) { default: return false; @@ -59253,8 +59311,15 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const { return false; break; } - case ISD::ADD: case ISD::MUL: + // When ZU is enabled, we prefer to not promote for MUL by a constant + // when there is an opportunity to fold a zext with imulzu. + if (Subtarget.hasZU() && IsFoldableZext(Op) && + (isa(Op.getOperand(0)) || + isa(Op.getOperand(1)))) + return false; + [[fallthrough]]; + case ISD::ADD: case ISD::AND: case ISD::OR: case ISD::XOR: diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index ea0b66c2f5516..7d4c5c0e10e49 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -2184,6 +2184,18 @@ multiclass EFLAGSDefiningPats { defm : EFLAGSDefiningPats<"", NoNDD>; defm : EFLAGSDefiningPats<"_ND", HasNDD>; +let Predicates = [HasZU] in { + // zext (mul reg/mem, imm) -> imulzu + def : Pat<(i32 (zext (i16 (mul GR16:$src1, imm:$src2)))), + (SUBREG_TO_REG (i32 0), (IMULZU16rri GR16:$src1, imm:$src2), sub_16bit)>; + def : Pat<(i32 (zext (i16 (mul (loadi16 addr:$src1), imm:$src2)))), + (SUBREG_TO_REG (i32 0), (IMULZU16rmi addr:$src1, imm:$src2), sub_16bit)>; + def : Pat<(i64 (zext (i16 (mul GR16:$src1, imm:$src2)))), + (SUBREG_TO_REG (i64 0), (IMULZU16rri GR16:$src1, imm:$src2), sub_16bit)>; + def : Pat<(i64 (zext (i16 (mul (loadi16 addr:$src1), imm:$src2)))), + (SUBREG_TO_REG (i64 0), (IMULZU16rmi addr:$src1, imm:$src2), sub_16bit)>; +} + // mul reg, imm def : Pat<(mul GR16:$src1, imm:$src2), (IMUL16rri GR16:$src1, imm:$src2)>; diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td index eb2e93a94b197..5bdcf51be9dd8 100644 --- a/llvm/lib/Target/X86/X86InstrPredicates.td +++ b/llvm/lib/Target/X86/X86InstrPredicates.td @@ -45,6 +45,7 @@ def NoEGPR : Predicate<"!Subtarget->hasEGPR()">; // entries, so that the NDD variant can be selected first to benefit RA. def HasNDD : Predicate<"Subtarget->hasNDD()">; def NoNDD : Predicate<"!Subtarget->hasNDD()">; +def HasZU : Predicate<"Subtarget->hasZU()">; def HasCF : Predicate<"Subtarget->hasCF()">; def HasCMOV : Predicate<"Subtarget->canUseCMOV()">; def NoCMOV : Predicate<"!Subtarget->canUseCMOV()">; diff --git a/llvm/lib/Target/X86/X86PfmCounters.td b/llvm/lib/Target/X86/X86PfmCounters.td index 0c80f1eaadadb..b31ed81160a29 100644 --- a/llvm/lib/Target/X86/X86PfmCounters.td +++ b/llvm/lib/Target/X86/X86PfmCounters.td @@ -210,10 +210,7 @@ def AlderLakePfmCounters : ProcPfmCounters { let IssueCounters = [ PfmIssueCounter<"ADLPPort00", "uops_dispatched:port_0">, PfmIssueCounter<"ADLPPort01", "uops_dispatched:port_1">, - // The perfmon documentation and thus libpfm seems to incorrectly label - // this performance counter, as ports 2,3, and 11 are actually grouped - // according to most documentation. See #113941 for additional details. - PfmIssueCounter<"ADLPPort02_03_11", "uops_dispatched:port_2_3_10">, + PfmIssueCounter<"ADLPPort02_03_10", "uops_dispatched:port_2_3_10">, PfmIssueCounter<"ADLPPort04_09", "uops_dispatched:port_4_9">, PfmIssueCounter<"ADLPPort05_11", "uops_dispatched:port_5_11">, PfmIssueCounter<"ADLPPort06", "uops_dispatched:port_6">, @@ -229,10 +226,7 @@ def SapphireRapidsPfmCounters : ProcPfmCounters { let IssueCounters = [ PfmIssueCounter<"SPRPort00", "uops_dispatched:port_0">, PfmIssueCounter<"SPRPort01", "uops_dispatched:port_1">, - // The perfmon documentation and thus libpfm seems to incorrectly label - // this performance counter, as ports 2,3, and 11 are actually grouped - // according to most documentation. See #113941 for additional details. - PfmIssueCounter<"SPRPort02_03_11", "uops_dispatched:port_2_3_10">, + PfmIssueCounter<"SPRPort02_03_10", "uops_dispatched:port_2_3_10">, PfmIssueCounter<"SPRPort04_09", "uops_dispatched:port_4_9">, PfmIssueCounter<"SPRPort05_11", "uops_dispatched:port_5_11">, PfmIssueCounter<"SPRPort06", "uops_dispatched:port_6">, diff --git a/llvm/lib/Target/X86/X86SchedAlderlakeP.td b/llvm/lib/Target/X86/X86SchedAlderlakeP.td index f8c6b32a853be..564369804711a 100644 --- a/llvm/lib/Target/X86/X86SchedAlderlakeP.td +++ b/llvm/lib/Target/X86/X86SchedAlderlakeP.td @@ -56,16 +56,15 @@ def ADLPPort00_05 : ProcResGroup<[ADLPPort00, ADLPPort05]>; def ADLPPort00_05_06 : ProcResGroup<[ADLPPort00, ADLPPort05, ADLPPort06]>; def ADLPPort00_06 : ProcResGroup<[ADLPPort00, ADLPPort06]>; def ADLPPort01_05 : ProcResGroup<[ADLPPort01, ADLPPort05]>; -def ADLPPort01_05_10 : ProcResGroup<[ADLPPort01, ADLPPort05, ADLPPort10]>; +def ADLPPort01_05_11 : ProcResGroup<[ADLPPort01, ADLPPort05, ADLPPort11]>; def ADLPPort02_03 : ProcResGroup<[ADLPPort02, ADLPPort03]>; def ADLPPort02_03_07 : ProcResGroup<[ADLPPort02, ADLPPort03, ADLPPort07]>; -def ADLPPort02_03_11 : ProcResGroup<[ADLPPort02, ADLPPort03, ADLPPort11]>; -def ADLPPort05_11 : ProcResGroup<[ADLPPort05, ADLPPort11]>; +def ADLPPort02_03_10 : ProcResGroup<[ADLPPort02, ADLPPort03, ADLPPort10]>; def ADLPPort07_08 : ProcResGroup<[ADLPPort07, ADLPPort08]>; // EU has 112 reservation stations. -def ADLPPort00_01_05_06_10 : ProcResGroup<[ADLPPort00, ADLPPort01, ADLPPort05, - ADLPPort06, ADLPPort10]> { +def ADLPPort00_01_05_06_11 : ProcResGroup<[ADLPPort00, ADLPPort01, ADLPPort05, + ADLPPort06, ADLPPort11]> { let BufferSize = 112; } @@ -75,8 +74,8 @@ def ADLPPort04_09 : ProcResGroup<[ADLPPort04, ADLPPort09]> { } // MEM has 72 reservation stations. -def ADLPPort02_03_07_08_11 : ProcResGroup<[ADLPPort02, ADLPPort03, ADLPPort07, - ADLPPort08, ADLPPort11]> { +def ADLPPort02_03_07_08_10 : ProcResGroup<[ADLPPort02, ADLPPort03, ADLPPort07, + ADLPPort08, ADLPPort10]> { let BufferSize = 72; } @@ -114,7 +113,7 @@ multiclass ADLPWriteResPair { + def : WriteRes { let Latency = !add(Lat, LoadLat); let ReleaseAtCycles = !listconcat([1], Res); let NumMicroOps = !add(UOps, LoadUOps); @@ -127,49 +126,49 @@ multiclass ADLPWriteResPair; -defm : X86WriteRes; +defm : X86WriteRes; defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : X86WriteRes; -defm : X86WriteRes; -def : WriteRes; -def : WriteRes { +defm : X86WriteRes; +def : WriteRes; +def : WriteRes { let Latency = 11; } defm : ADLPWriteResPair; -defm : ADLPWriteResPair; +defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : ADLPWriteResPair; def : WriteRes; defm : X86WriteRes; defm : ADLPWriteResPair; def : WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; def : WriteRes; def : WriteRes { let Latency = 11; } -defm : X86WriteRes; +defm : X86WriteRes; defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : ADLPWriteResPair; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : ADLPWriteResPair; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteResPairUnsupported; defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : X86WriteResPairUnsupported; @@ -177,17 +176,17 @@ defm : ADLPWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteResPairUnsupported; defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteResPairUnsupported; defm : X86WriteRes; defm : X86WriteRes; @@ -199,12 +198,12 @@ defm : ADLPWriteResPair defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : ADLPWriteResPair; -defm : ADLPWriteResPair; -defm : ADLPWriteResPair; +defm : ADLPWriteResPair; +defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : X86WriteRes; defm : X86WriteRes; @@ -212,7 +211,7 @@ defm : X86WriteRes { let Latency = 3; } -defm : X86WriteRes; +defm : X86WriteRes; defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : ADLPWriteResPair; @@ -249,13 +248,13 @@ defm : ADLPWriteResPair; defm : X86WriteRes; defm : X86WriteRes; -def : WriteRes { +def : WriteRes { let Latency = 7; } -def : WriteRes { +def : WriteRes { let Latency = 7; } -def : WriteRes { +def : WriteRes { let Latency = 8; } defm : ADLPWriteResPair; @@ -268,8 +267,8 @@ defm : X86WriteResPairUnsupported; def : WriteRes { let Latency = 3; } -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; @@ -331,15 +330,15 @@ defm : X86WriteResPairUnsupported; def : WriteRes { let Latency = 2; } -defm : ADLPWriteResPair; -defm : ADLPWriteResPair; +defm : ADLPWriteResPair; +defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : X86WriteRes; defm : X86WriteRes; -defm : ADLPWriteResPair; -defm : ADLPWriteResPair; +defm : ADLPWriteResPair; +defm : ADLPWriteResPair; defm : ADLPWriteResPair; -defm : ADLPWriteResPair; +defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : ADLPWriteResPair; @@ -357,10 +356,10 @@ defm : X86WriteRes; def : WriteRes { let Latency = 3; } -defm : X86WriteRes; +defm : X86WriteRes; def : WriteRes; defm : ADLPWriteResPair; -def : WriteRes { +def : WriteRes { let Latency = 5; } def : WriteRes { @@ -368,17 +367,17 @@ def : WriteRes { } defm : ADLPWriteResPair; defm : ADLPWriteResPair; -defm : ADLPWriteResPair; +defm : ADLPWriteResPair; defm : ADLPWriteResPair; def : WriteRes { let Latency = AlderlakePModel.MaxLatency; } -def : WriteRes; +def : WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : ADLPWriteResPair; @@ -393,16 +392,16 @@ defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : X86WriteResPairUnsupported; -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; def : WriteRes { let Latency = 3; } @@ -447,20 +446,20 @@ defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteRes; -defm : X86WriteRes; -def : WriteRes { +defm : X86WriteRes; +def : WriteRes { let Latency = 7; } -def : WriteRes { +def : WriteRes { let Latency = 7; } -def : WriteRes { +def : WriteRes { let Latency = 8; } -def : WriteRes { +def : WriteRes { let Latency = 7; } -def : WriteRes { +def : WriteRes { let Latency = 8; } defm : ADLPWriteResPair; @@ -474,8 +473,8 @@ def : WriteRes { let Latency = 4; } defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; @@ -498,9 +497,9 @@ def : WriteRes; defm : X86WriteResUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteResPairUnsupported; defm : X86WriteRes; defm : X86WriteRes; @@ -509,7 +508,7 @@ defm : X86WriteRes; defm : ADLPWriteResPair; defm : ADLPWriteResPair; -defm : X86WriteRes; +defm : X86WriteRes; def : WriteRes; // Infered SchedWriteRes and InstRW definition. @@ -521,14 +520,14 @@ def ADLPWriteResGroup0 : SchedWriteRes<[ADLPPort00_01_05_06, ADLPPort02_03, ADLP def : InstRW<[ADLPWriteResGroup0], (instregex "^AA(D|N)D64mr$", "^A(X?)OR64mr$")>; -def ADLPWriteResGroup1 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup1 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [2, 1, 1, 1, 1]; let Latency = 12; let NumMicroOps = 6; } def : InstRW<[ADLPWriteResGroup1, ReadAfterLd, ReadAfterLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^(ADC|SBB)(16|32|64)mr$")>; -def ADLPWriteResGroup2 : SchedWriteRes<[ADLPPort00_06, ADLPPort02_03_11]> { +def ADLPWriteResGroup2 : SchedWriteRes<[ADLPPort00_06, ADLPPort02_03_10]> { let Latency = 6; let NumMicroOps = 2; } @@ -538,20 +537,20 @@ def : InstRW<[ADLPWriteResGroup2], (instregex "^JMP(16|32|64)m((_NT)?)$", def : InstRW<[ADLPWriteResGroup2, ReadAfterLd, ReadAfterLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^(ADC|SBB)(8|16|32|64)rm$", "^AD(C|O)X(32|64)rm$")>; -def ADLPWriteResGroup3 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup3 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let Latency = 13; let NumMicroOps = 5; } def : InstRW<[ADLPWriteResGroup3], (instregex "^(ADC|SBB)8mi(8?)$")>; -def ADLPWriteResGroup4 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup4 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [2, 1, 1, 1, 1]; let Latency = 13; let NumMicroOps = 6; } def : InstRW<[ADLPWriteResGroup4, ReadAfterLd, ReadAfterLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^(ADC|SBB)8mr$")>; -def ADLPWriteResGroup5 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11]> { +def ADLPWriteResGroup5 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort02_03_10]> { let Latency = 6; let NumMicroOps = 2; } @@ -576,7 +575,7 @@ def : InstRW<[ADLPWriteResGroup6], (instregex "^(ADD|SUB)64ri8$", def : InstRW<[ADLPWriteResGroup6], (instrs CLC, JMP_2)>; -def ADLPWriteResGroup7 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup7 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let Latency = 13; let NumMicroOps = 4; } @@ -610,7 +609,7 @@ def ADLPWriteResGroup10 : SchedWriteRes<[ADLPPort02_03, ADLPPort05]> { def : InstRW<[ADLPWriteResGroup10], (instregex "^ADD_FI(16|32)m$", "^SUB(R?)_FI(16|32)m$")>; -def ADLPWriteResGroup11 : SchedWriteRes<[ADLPPort00_01_05_06_10]> { +def ADLPWriteResGroup11 : SchedWriteRes<[ADLPPort00_01_05_06_11]> { let Latency = 2; } def : InstRW<[ADLPWriteResGroup11], (instregex "^AND(8|16|32|64)r(r|i8)$", @@ -628,7 +627,7 @@ def : InstRW<[ADLPWriteResGroup11], (instregex "^AND(8|16|32|64)r(r|i8)$", "^TEST(8|16|32|64)rr$")>; def : InstRW<[ADLPWriteResGroup11], (instrs XOR8rr_NOREX)>; -def ADLPWriteResGroup12 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11]> { +def ADLPWriteResGroup12 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort02_03_10]> { let Latency = 7; let NumMicroOps = 2; } @@ -638,18 +637,18 @@ def : InstRW<[ADLPWriteResGroup12, ReadAfterLd], (instregex "^(X?)OR64rm$")>; def : InstRW<[ADLPWriteResGroup12, ReadAfterLd], (instrs AND64rm)>; def : InstRW<[ADLPWriteResGroup12, ReadAfterLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^TEST(8|16|32|64)mr$")>; -def ADLPWriteResGroup13 : SchedWriteRes<[ADLPPort01_05_10, ADLPPort02_03_11]> { +def ADLPWriteResGroup13 : SchedWriteRes<[ADLPPort01_05_11, ADLPPort02_03_10]> { let Latency = 7; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup13, ReadAfterLd], (instregex "^ANDN(32|64)rm$")>; -def ADLPWriteResGroup14 : SchedWriteRes<[ADLPPort01_05_10]> { +def ADLPWriteResGroup14 : SchedWriteRes<[ADLPPort01_05_11]> { let Latency = 2; } def : InstRW<[ADLPWriteResGroup14], (instregex "^ANDN(32|64)rr$")>; -def ADLPWriteResGroup15 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11]> { +def ADLPWriteResGroup15 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10]> { let ReleaseAtCycles = [5, 2, 1, 1]; let Latency = 10; let NumMicroOps = 9; @@ -662,14 +661,14 @@ def ADLPWriteResGroup16 : SchedWriteRes<[ADLPPort01]> { def : InstRW<[ADLPWriteResGroup16], (instregex "^BT((C|R|S)?)64rr$", "^P(DEP|EXT)(32|64)rr$")>; -def ADLPWriteResGroup17 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup17 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [4, 2, 1, 1, 1, 1]; let Latency = 17; let NumMicroOps = 10; } def : InstRW<[ADLPWriteResGroup17], (instregex "^BT(C|R|S)64mr$")>; -def ADLPWriteResGroup18 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup18 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let Latency = 7; let NumMicroOps = 5; } @@ -701,25 +700,25 @@ def ADLPWriteResGroup22 : SchedWriteRes<[ADLPPort00_06]>; def : InstRW<[ADLPWriteResGroup22], (instregex "^C(DQ|QO)$", "^(CL|ST)AC$")>; -def ADLPWriteResGroup23 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06]> { +def ADLPWriteResGroup23 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06]> { let Latency = 3; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup23], (instrs CLD)>; -def ADLPWriteResGroup24 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup24 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort04_09, ADLPPort07_08]> { let Latency = 3; let NumMicroOps = 3; } def : InstRW<[ADLPWriteResGroup24], (instrs CLDEMOTE)>; -def ADLPWriteResGroup25 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup25 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort04_09, ADLPPort07_08]> { let Latency = 2; let NumMicroOps = 4; } def : InstRW<[ADLPWriteResGroup25], (instrs CLFLUSH)>; -def ADLPWriteResGroup26 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup26 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort04_09, ADLPPort07_08]> { let Latency = 2; let NumMicroOps = 3; } @@ -739,35 +738,35 @@ def ADLPWriteResGroup28 : SchedWriteRes<[ADLPPort00_06, ADLPPort01, ADLPPort05]> } def : InstRW<[ADLPWriteResGroup28], (instrs CLTS)>; -def ADLPWriteResGroup29 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup29 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort04_09, ADLPPort07_08]> { let Latency = 5; let NumMicroOps = 3; } def : InstRW<[ADLPWriteResGroup29], (instregex "^MOV16o(16|32|64)a$")>; def : InstRW<[ADLPWriteResGroup29], (instrs CLWB)>; -def ADLPWriteResGroup30 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11]> { +def ADLPWriteResGroup30 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort02_03_10]> { let ReleaseAtCycles = [5, 2]; let Latency = 6; let NumMicroOps = 7; } def : InstRW<[ADLPWriteResGroup30], (instregex "^CMPS(B|L|Q|W)$")>; -def ADLPWriteResGroup31 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01_05, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup31 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01_05, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [2, 7, 6, 2, 1, 1, 2, 1]; let Latency = 32; let NumMicroOps = 22; } def : InstRW<[ADLPWriteResGroup31], (instrs CMPXCHG16B)>; -def ADLPWriteResGroup32 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup32 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [4, 7, 2, 1, 1, 1]; let Latency = 25; let NumMicroOps = 16; } def : InstRW<[ADLPWriteResGroup32], (instrs CMPXCHG8B)>; -def ADLPWriteResGroup33 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup33 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [1, 2, 1, 1, 1]; let Latency = 13; let NumMicroOps = 6; @@ -781,13 +780,13 @@ def ADLPWriteResGroup34 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_0 } def : InstRW<[ADLPWriteResGroup34], (instrs CPUID)>; -def ADLPWriteResGroup35 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort02_03_11]> { +def ADLPWriteResGroup35 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort02_03_10]> { let Latency = 26; let NumMicroOps = 3; } def : InstRW<[ADLPWriteResGroup35], (instregex "^(V?)CVT(T?)SD2SIrm((_Int)?)$")>; -def ADLPWriteResGroup36 : SchedWriteRes<[ADLPPort00_01, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup36 : SchedWriteRes<[ADLPPort00_01, ADLPPort02_03_10, ADLPPort05]> { let Latency = 12; let NumMicroOps = 3; } @@ -811,7 +810,7 @@ def ADLPWriteResGroup38 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort05]> def : InstRW<[ADLPWriteResGroup38], (instregex "^(V?)CVT(T?)SS2SI64rr_Int$")>; def : InstRW<[ADLPWriteResGroup38, ReadDefault], (instregex "^(V?)CVT(T?)SS2SI64rr$")>; -def ADLPWriteResGroup39 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06]> { +def ADLPWriteResGroup39 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06]> { let Latency = 2; let NumMicroOps = 2; } @@ -827,7 +826,7 @@ def : InstRW<[ADLPWriteResGroup40], (instrs DEC16r_alt, ST_FPrr, SYSCALL)>; -def ADLPWriteResGroup41 : SchedWriteRes<[ADLPPort00_06, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup41 : SchedWriteRes<[ADLPPort00_06, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let Latency = 7; } def : InstRW<[ADLPWriteResGroup41], (instrs DEC32r_alt)>; @@ -850,7 +849,7 @@ def ADLPWriteResGroup44 : SchedWriteRes<[ADLPPort00]> { def : InstRW<[ADLPWriteResGroup44], (instregex "^DIVR_F(P?)rST0$")>; def : InstRW<[ADLPWriteResGroup44], (instrs DIVR_FST0r)>; -def ADLPWriteResGroup45 : SchedWriteRes<[ADLPPort00, ADLPPort02_03_11]> { +def ADLPWriteResGroup45 : SchedWriteRes<[ADLPPort00, ADLPPort02_03_10]> { let Latency = 20; let NumMicroOps = 2; } @@ -874,7 +873,7 @@ def ADLPWriteResGroup48 : SchedWriteRes<[ADLPPort00]> { def : InstRW<[ADLPWriteResGroup48], (instregex "^DIV_F(P?)rST0$")>; def : InstRW<[ADLPWriteResGroup48], (instrs DIV_FST0r)>; -def ADLPWriteResGroup49 : SchedWriteRes<[ADLPPort00, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup49 : SchedWriteRes<[ADLPPort00, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [2, 21, 2, 14, 4, 9, 5]; let Latency = 126; let NumMicroOps = 57; @@ -1001,14 +1000,14 @@ def ADLPWriteResGroup67 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06, ADLPPo } def : InstRW<[ADLPWriteResGroup67], (instrs FXRSTOR64)>; -def ADLPWriteResGroup68 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup68 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [2, 5, 10, 10, 2, 38, 5, 38]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 110; } def : InstRW<[ADLPWriteResGroup68], (instregex "^FXSAVE((64)?)$")>; -def ADLPWriteResGroup69 : SchedWriteRes<[ADLPPort00_01, ADLPPort02_03_11]> { +def ADLPWriteResGroup69 : SchedWriteRes<[ADLPPort00_01, ADLPPort02_03_10]> { let Latency = 12; let NumMicroOps = 2; } @@ -1023,41 +1022,41 @@ def ADLPWriteResGroup70 : SchedWriteRes<[ADLPPort00_01]> { def : InstRW<[ADLPWriteResGroup70], (instregex "^(V?)GF2P8MULBrr$")>; def : InstRW<[ADLPWriteResGroup70], (instrs VGF2P8MULBYrr)>; -def ADLPWriteResGroup71 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup71 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort01_05_11, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [7, 5, 26, 19, 2, 7, 21]; let Latency = 35; let NumMicroOps = 87; } def : InstRW<[ADLPWriteResGroup71], (instrs IN16ri)>; -def ADLPWriteResGroup72 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup72 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort01_05_11, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [7, 1, 4, 26, 19, 3, 7, 20]; let Latency = 35; let NumMicroOps = 87; } def : InstRW<[ADLPWriteResGroup72], (instrs IN16rr)>; -def ADLPWriteResGroup73 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup73 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort01_05_11, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [7, 6, 28, 21, 2, 10, 20]; let Latency = 35; let NumMicroOps = 94; } def : InstRW<[ADLPWriteResGroup73], (instrs IN32ri)>; -def ADLPWriteResGroup74 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup74 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort01_05_11, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [7, 9, 28, 21, 2, 11, 21]; let NumMicroOps = 99; } def : InstRW<[ADLPWriteResGroup74], (instrs IN32rr)>; -def ADLPWriteResGroup75 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup75 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort01_05_11, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [7, 6, 25, 19, 2, 8, 20]; let Latency = 35; let NumMicroOps = 87; } def : InstRW<[ADLPWriteResGroup75], (instrs IN8ri)>; -def ADLPWriteResGroup76 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup76 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort01_05_11, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [7, 6, 25, 19, 2, 7, 20]; let Latency = 35; let NumMicroOps = 86; @@ -1069,7 +1068,7 @@ def ADLPWriteResGroup77 : SchedWriteRes<[ADLPPort00_06]> { } def : InstRW<[ADLPWriteResGroup77], (instrs INC16r_alt)>; -def ADLPWriteResGroup78 : SchedWriteRes<[ADLPPort02_03_11]> { +def ADLPWriteResGroup78 : SchedWriteRes<[ADLPPort02_03_10]> { let Latency = 7; } def : InstRW<[ADLPWriteResGroup78], (instregex "^(V?)MOV(D|SH|SL)DUPrm$", @@ -1077,28 +1076,28 @@ def : InstRW<[ADLPWriteResGroup78], (instregex "^(V?)MOV(D|SH|SL)DUPrm$", def : InstRW<[ADLPWriteResGroup78], (instrs INC32r_alt, VBROADCASTSSrm)>; -def ADLPWriteResGroup79 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup79 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [7, 6, 24, 17, 8, 1, 19, 1]; let Latency = 20; let NumMicroOps = 83; } def : InstRW<[ADLPWriteResGroup79], (instrs INSB)>; -def ADLPWriteResGroup80 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort00_01_05_06_10, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup80 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort00_01_05_06_11, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [7, 1, 5, 1, 27, 17, 11, 1, 21, 1]; let Latency = 20; let NumMicroOps = 92; } def : InstRW<[ADLPWriteResGroup80], (instrs INSL)>; -def ADLPWriteResGroup81 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort00_01_05_06_10, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup81 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort00_01_05_06_11, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05_11, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [7, 1, 4, 1, 25, 17, 1, 9, 1, 19, 1]; let Latency = 20; let NumMicroOps = 86; } def : InstRW<[ADLPWriteResGroup81], (instrs INSW)>; -def ADLPWriteResGroup82 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup82 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort01_05_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [5, 4, 8, 6, 2, 5, 7, 5]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 42; @@ -1128,35 +1127,35 @@ def ADLPWriteResGroup86 : SchedWriteRes<[]> { def : InstRW<[ADLPWriteResGroup86], (instregex "^JMP_(1|4)$")>; def : InstRW<[ADLPWriteResGroup86], (instrs VZEROUPPER)>; -def ADLPWriteResGroup87 : SchedWriteRes<[ADLPPort00, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup87 : SchedWriteRes<[ADLPPort00, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [8, 2, 14, 3, 1]; let Latency = 198; let NumMicroOps = 81; } def : InstRW<[ADLPWriteResGroup87], (instrs LAR16rm)>; -def ADLPWriteResGroup88 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup88 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [1, 3, 1, 8, 5, 1, 2, 1]; let Latency = 66; let NumMicroOps = 22; } def : InstRW<[ADLPWriteResGroup88], (instrs LAR16rr)>; -def ADLPWriteResGroup89 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup89 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [1, 2, 2, 9, 5, 3, 1]; let Latency = 71; let NumMicroOps = 85; } def : InstRW<[ADLPWriteResGroup89], (instrs LAR32rm)>; -def ADLPWriteResGroup90 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup90 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [1, 3, 1, 8, 5, 1, 2, 1]; let Latency = 65; let NumMicroOps = 22; } def : InstRW<[ADLPWriteResGroup90], (instregex "^LAR(32|64)rr$")>; -def ADLPWriteResGroup91 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup91 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [1, 2, 2, 9, 5, 3, 1]; let Latency = 71; let NumMicroOps = 87; @@ -1168,13 +1167,13 @@ def ADLPWriteResGroup92 : SchedWriteRes<[ADLPPort02_03]> { } def : InstRW<[ADLPWriteResGroup92], (instregex "^LD_F(32|64|80)m$")>; -def ADLPWriteResGroup93 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort01]> { +def ADLPWriteResGroup93 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort01]> { let Latency = 2; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup93], (instrs LEA16r)>; -def ADLPWriteResGroup94 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11]> { +def ADLPWriteResGroup94 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort02_03_10]> { let ReleaseAtCycles = [3, 1]; let Latency = 6; let NumMicroOps = 4; @@ -1183,77 +1182,77 @@ def : InstRW<[ADLPWriteResGroup94], (instregex "^LODS(B|W)$", "^SCAS(B|L|Q|W)$")>; def : InstRW<[ADLPWriteResGroup94], (instrs LEAVE)>; -def ADLPWriteResGroup95 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11]> { +def ADLPWriteResGroup95 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 6; let NumMicroOps = 3; } def : InstRW<[ADLPWriteResGroup95], (instrs LEAVE64)>; -def ADLPWriteResGroup96 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup96 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [1, 2, 4, 3, 2, 1, 1]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 14; } def : InstRW<[ADLPWriteResGroup96], (instrs LGDT64m)>; -def ADLPWriteResGroup97 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup97 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [1, 1, 5, 3, 2, 1, 1]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 14; } def : InstRW<[ADLPWriteResGroup97], (instrs LIDT64m)>; -def ADLPWriteResGroup98 : SchedWriteRes<[ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup98 : SchedWriteRes<[ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [5, 3, 2, 1, 1]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 12; } def : InstRW<[ADLPWriteResGroup98], (instrs LLDT16m)>; -def ADLPWriteResGroup99 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup99 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [1, 4, 3, 1, 1, 1]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 11; } def : InstRW<[ADLPWriteResGroup99], (instrs LLDT16r)>; -def ADLPWriteResGroup100 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup100 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [1, 1, 2, 8, 3, 1, 2, 7, 2]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 27; } def : InstRW<[ADLPWriteResGroup100], (instrs LMSW16m)>; -def ADLPWriteResGroup101 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup101 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [5, 7, 1, 2, 5, 2]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 22; } def : InstRW<[ADLPWriteResGroup101], (instrs LMSW16r)>; -def ADLPWriteResGroup102 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11]> { +def ADLPWriteResGroup102 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 5; let NumMicroOps = 3; } def : InstRW<[ADLPWriteResGroup102], (instregex "^LODS(L|Q)$")>; -def ADLPWriteResGroup103 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> { +def ADLPWriteResGroup103 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01]> { let ReleaseAtCycles = [2, 4, 1]; let Latency = 3; let NumMicroOps = 7; } def : InstRW<[ADLPWriteResGroup103], (instrs LOOP)>; -def ADLPWriteResGroup104 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> { +def ADLPWriteResGroup104 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01]> { let ReleaseAtCycles = [4, 6, 1]; let Latency = 3; let NumMicroOps = 11; } def : InstRW<[ADLPWriteResGroup104], (instrs LOOPE)>; -def ADLPWriteResGroup105 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> { +def ADLPWriteResGroup105 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01]> { let ReleaseAtCycles = [4, 6, 1]; let Latency = 2; let NumMicroOps = 11; @@ -1266,21 +1265,21 @@ def ADLPWriteResGroup106 : SchedWriteRes<[ADLPPort00_01_05_06, ADLPPort02_03, AD } def : InstRW<[ADLPWriteResGroup106], (instrs LRET64)>; -def ADLPWriteResGroup107 : SchedWriteRes<[ADLPPort00, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup107 : SchedWriteRes<[ADLPPort00, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [1, 5, 3, 3, 1]; let Latency = 70; let NumMicroOps = 13; } def : InstRW<[ADLPWriteResGroup107], (instregex "^LSL(16|32|64)rm$")>; -def ADLPWriteResGroup108 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup108 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [1, 4, 4, 3, 2, 1]; let Latency = 63; let NumMicroOps = 15; } def : InstRW<[ADLPWriteResGroup108], (instregex "^LSL(16|32|64)rr$")>; -def ADLPWriteResGroup109 : SchedWriteRes<[ADLPPort00_01, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup109 : SchedWriteRes<[ADLPPort00_01, ADLPPort02_03_10, ADLPPort05]> { let Latency = 24; let NumMicroOps = 3; } @@ -1304,7 +1303,7 @@ def ADLPWriteResGroup112 : SchedWriteRes<[ADLPPort00, ADLPPort00_01]> { } def : InstRW<[ADLPWriteResGroup112], (instrs MMX_CVTPI2PSrr)>; -def ADLPWriteResGroup113 : SchedWriteRes<[ADLPPort00, ADLPPort02_03_11]> { +def ADLPWriteResGroup113 : SchedWriteRes<[ADLPPort00, ADLPPort02_03_10]> { let Latency = 13; let NumMicroOps = 2; } @@ -1329,7 +1328,7 @@ def ADLPWriteResGroup116 : SchedWriteRes<[ADLPPort04_09, ADLPPort07_08]> { } def : InstRW<[ADLPWriteResGroup116], (instrs MMX_MOVD64mr)>; -def ADLPWriteResGroup117 : SchedWriteRes<[ADLPPort02_03_11]> { +def ADLPWriteResGroup117 : SchedWriteRes<[ADLPPort02_03_10]> { let Latency = 8; } def : InstRW<[ADLPWriteResGroup117], (instregex "^MMX_MOV(D|Q)64rm$", @@ -1351,7 +1350,7 @@ def ADLPWriteResGroup119 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05]> { } def : InstRW<[ADLPWriteResGroup119], (instregex "^MMX_MOVQ2(DQ|FR64)rr$")>; -def ADLPWriteResGroup120 : SchedWriteRes<[ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup120 : SchedWriteRes<[ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [1, 2]; let Latency = 12; let NumMicroOps = 3; @@ -1368,13 +1367,13 @@ def : InstRW<[ADLPWriteResGroup121], (instregex "^MMX_PACKSS(DW|WB)rr$")>; def : InstRW<[ADLPWriteResGroup121], (instrs MMX_PACKUSWBrr)>; def : InstRW<[ADLPWriteResGroup121, ReadDefault, ReadInt2Fpu], (instrs MMX_PINSRWrri)>; -def ADLPWriteResGroup122 : SchedWriteRes<[ADLPPort00_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup122 : SchedWriteRes<[ADLPPort00_05, ADLPPort02_03_10]> { let Latency = 9; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup122, ReadAfterVecLd], (instregex "^MMX_P(ADD|SUB)(B|D|Q|W)rm$")>; -def ADLPWriteResGroup123 : SchedWriteRes<[ADLPPort00, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup123 : SchedWriteRes<[ADLPPort00, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 11; let NumMicroOps = 4; @@ -1388,7 +1387,7 @@ def ADLPWriteResGroup124 : SchedWriteRes<[ADLPPort00, ADLPPort05]> { } def : InstRW<[ADLPWriteResGroup124], (instregex "^MMX_PH(ADD|SUB)SWrr$")>; -def ADLPWriteResGroup125 : SchedWriteRes<[ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup125 : SchedWriteRes<[ADLPPort02_03_10, ADLPPort05]> { let Latency = 9; let NumMicroOps = 2; } @@ -1396,7 +1395,7 @@ def : InstRW<[ADLPWriteResGroup125], (instregex "^VPBROADCAST(B|W)Yrm$")>; def : InstRW<[ADLPWriteResGroup125, ReadAfterLd], (instrs MMX_PINSRWrmi)>; def : InstRW<[ADLPWriteResGroup125, ReadAfterVecYLd], (instrs VPALIGNRYrmi)>; -def ADLPWriteResGroup126 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11]> { +def ADLPWriteResGroup126 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort02_03_10]> { let Latency = 5; let NumMicroOps = 2; } @@ -1410,35 +1409,35 @@ def : InstRW<[ADLPWriteResGroup127], (instregex "^PUSH(F|G)S(16|32)$")>; def : InstRW<[ADLPWriteResGroup127], (instrs MOV16ms, MOVBE32mr)>; -def ADLPWriteResGroup128 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort01]> { +def ADLPWriteResGroup128 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort01]> { let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup128], (instregex "^MOV(16|32|64)rs$", "^S(TR|LDT)16r$")>; -def ADLPWriteResGroup129 : SchedWriteRes<[ADLPPort02_03_11]>; +def ADLPWriteResGroup129 : SchedWriteRes<[ADLPPort02_03_10]>; def : InstRW<[ADLPWriteResGroup129], (instregex "^MOV32ao(16|32|64)$")>; def : InstRW<[ADLPWriteResGroup129], (instrs MOV64ao64)>; -def ADLPWriteResGroup130 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup130 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort04_09, ADLPPort07_08]> { let NumMicroOps = 3; } def : InstRW<[ADLPWriteResGroup130], (instregex "^MOV(8|32)o(16|32)a$", "^MOV(8|32|64)o64a$")>; -def ADLPWriteResGroup131 : SchedWriteRes<[ADLPPort00_01_05_06_10]> { +def ADLPWriteResGroup131 : SchedWriteRes<[ADLPPort00_01_05_06_11]> { let Latency = 0; } def : InstRW<[ADLPWriteResGroup131], (instregex "^MOV32rr((_REV)?)$", "^MOVZX(32|64)rr8$")>; def : InstRW<[ADLPWriteResGroup131], (instrs MOVZX32rr8_NOREX)>; -def ADLPWriteResGroup132 : SchedWriteRes<[ADLPPort02_03_11]> { +def ADLPWriteResGroup132 : SchedWriteRes<[ADLPPort02_03_10]> { let Latency = 5; } def : InstRW<[ADLPWriteResGroup132], (instrs MOV64ao32)>; -def ADLPWriteResGroup133 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup133 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort01_05_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [1, 2, 4, 16, 7, 2, 2, 12, 2]; let Latency = 217; let NumMicroOps = 48; @@ -1451,20 +1450,20 @@ def ADLPWriteResGroup134 : SchedWriteRes<[ADLPPort04_09, ADLPPort07_08]> { } def : InstRW<[ADLPWriteResGroup134], (instrs MOV64o32a)>; -def ADLPWriteResGroup135 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort05]> { +def ADLPWriteResGroup135 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort05]> { let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 3; } def : InstRW<[ADLPWriteResGroup135], (instrs MOV64rc)>; -def ADLPWriteResGroup136 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort05]> { +def ADLPWriteResGroup136 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort01_05_11, ADLPPort05]> { let ReleaseAtCycles = [3, 4, 8, 4, 2, 3]; let Latency = 181; let NumMicroOps = 24; } def : InstRW<[ADLPWriteResGroup136], (instrs MOV64rd)>; -def ADLPWriteResGroup137 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11]> { +def ADLPWriteResGroup137 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort02_03_10]> { let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup137], (instregex "^MOV8ao(16|32|64)$")>; @@ -1482,13 +1481,13 @@ def ADLPWriteResGroup139 : SchedWriteRes<[ADLPPort00_06, ADLPPort04_09, ADLPPort } def : InstRW<[ADLPWriteResGroup139], (instrs MOVBE16mr)>; -def ADLPWriteResGroup140 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort02_03_11]> { +def ADLPWriteResGroup140 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort02_03_10]> { let Latency = 7; let NumMicroOps = 3; } def : InstRW<[ADLPWriteResGroup140], (instrs MOVBE16rm)>; -def ADLPWriteResGroup141 : SchedWriteRes<[ADLPPort01, ADLPPort02_03_11]> { +def ADLPWriteResGroup141 : SchedWriteRes<[ADLPPort01, ADLPPort02_03_10]> { let Latency = 6; let NumMicroOps = 2; } @@ -1503,13 +1502,13 @@ def : InstRW<[ADLPWriteResGroup142], (instrs MOVBE64mr, SLDT16m, STRm)>; -def ADLPWriteResGroup143 : SchedWriteRes<[ADLPPort00_06, ADLPPort01, ADLPPort02_03_11]> { +def ADLPWriteResGroup143 : SchedWriteRes<[ADLPPort00_06, ADLPPort01, ADLPPort02_03_10]> { let Latency = 7; let NumMicroOps = 3; } def : InstRW<[ADLPWriteResGroup143], (instrs MOVBE64rm)>; -def ADLPWriteResGroup144 : SchedWriteRes<[ADLPPort00_06, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup144 : SchedWriteRes<[ADLPPort00_06, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let NumMicroOps = 4; } def : InstRW<[ADLPWriteResGroup144], (instregex "^MOVDIR64B(16|32|64)$")>; @@ -1526,7 +1525,7 @@ def ADLPWriteResGroup146 : SchedWriteRes<[ADLPPort04_09, ADLPPort07_08]> { } def : InstRW<[ADLPWriteResGroup146], (instrs MOVDIRI64)>; -def ADLPWriteResGroup147 : SchedWriteRes<[ADLPPort01_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup147 : SchedWriteRes<[ADLPPort01_05, ADLPPort02_03_10]> { let Latency = 8; let NumMicroOps = 2; } @@ -1545,7 +1544,7 @@ def ADLPWriteResGroup149 : SchedWriteRes<[ADLPPort04_09, ADLPPort07_08]> { } def : InstRW<[ADLPWriteResGroup149], (instrs MOVNTImr)>; -def ADLPWriteResGroup150 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup150 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [4, 1, 1, 1]; let Latency = 8; let NumMicroOps = 7; @@ -1558,27 +1557,27 @@ def : InstRW<[ADLPWriteResGroup151], (instregex "^(V?)MOVS(D|S)rr((_REV)?)$", "^VP(ADD|SUB)(B|D|Q|W)Yrr$")>; def : InstRW<[ADLPWriteResGroup151], (instrs VPBLENDDrri)>; -def ADLPWriteResGroup152 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup152 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [4, 1, 1, 1]; let Latency = 7; let NumMicroOps = 7; } def : InstRW<[ADLPWriteResGroup152], (instregex "^MOVS(L|Q|W)$")>; -def ADLPWriteResGroup153 : SchedWriteRes<[ADLPPort02_03_11]> { +def ADLPWriteResGroup153 : SchedWriteRes<[ADLPPort02_03_10]> { let Latency = 6; } def : InstRW<[ADLPWriteResGroup153], (instregex "^MOVSX(16|32|64)rm(16|32)$", "^MOVSX(32|64)rm8$")>; def : InstRW<[ADLPWriteResGroup153], (instrs MOVSX32rm8_NOREX)>; -def ADLPWriteResGroup154 : SchedWriteRes<[ADLPPort01_05_10, ADLPPort02_03_11]> { +def ADLPWriteResGroup154 : SchedWriteRes<[ADLPPort01_05_11, ADLPPort02_03_10]> { let Latency = 6; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup154], (instrs MOVSX16rm8)>; -def ADLPWriteResGroup155 : SchedWriteRes<[ADLPPort01_05_10]>; +def ADLPWriteResGroup155 : SchedWriteRes<[ADLPPort01_05_11]>; def : InstRW<[ADLPWriteResGroup155], (instregex "^MOVSX(16|32|64)rr(8|16|32)$")>; def : InstRW<[ADLPWriteResGroup155], (instrs MOVSX32rr8_NOREX)>; @@ -1607,70 +1606,70 @@ def ADLPWriteResGroup159 : SchedWriteRes<[ADLPPort00_01_05_06, ADLPPort05, ADLPP } def : InstRW<[ADLPWriteResGroup159], (instrs MWAITrr)>; -def ADLPWriteResGroup160 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup160 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [6, 4, 1, 28, 15, 7, 1, 16, 1]; let Latency = 35; let NumMicroOps = 79; } def : InstRW<[ADLPWriteResGroup160], (instrs OUT16ir)>; -def ADLPWriteResGroup161 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup161 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [6, 6, 27, 15, 7, 1, 16, 1]; let Latency = 35; let NumMicroOps = 79; } def : InstRW<[ADLPWriteResGroup161], (instrs OUT16rr)>; -def ADLPWriteResGroup162 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup162 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [6, 4, 1, 30, 15, 9, 1, 18, 1]; let Latency = 35; let NumMicroOps = 85; } def : InstRW<[ADLPWriteResGroup162], (instrs OUT32ir)>; -def ADLPWriteResGroup163 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup163 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [6, 6, 29, 15, 9, 1, 18, 1]; let Latency = 35; let NumMicroOps = 85; } def : InstRW<[ADLPWriteResGroup163], (instrs OUT32rr)>; -def ADLPWriteResGroup164 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup164 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [5, 5, 1, 25, 15, 5, 1, 15, 1]; let Latency = 35; let NumMicroOps = 73; } def : InstRW<[ADLPWriteResGroup164], (instrs OUT8ir)>; -def ADLPWriteResGroup165 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup165 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [5, 5, 26, 15, 5, 1, 15, 1]; let Latency = 35; let NumMicroOps = 73; } def : InstRW<[ADLPWriteResGroup165], (instrs OUT8rr)>; -def ADLPWriteResGroup166 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup166 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [7, 6, 25, 16, 7, 1, 17, 1]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 80; } def : InstRW<[ADLPWriteResGroup166], (instrs OUTSB)>; -def ADLPWriteResGroup167 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup167 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [7, 6, 28, 16, 10, 1, 20, 1]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 89; } def : InstRW<[ADLPWriteResGroup167], (instrs OUTSL)>; -def ADLPWriteResGroup168 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup168 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [6, 1, 5, 27, 16, 8, 1, 18, 1]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 83; } def : InstRW<[ADLPWriteResGroup168], (instrs OUTSW)>; -def ADLPWriteResGroup169 : SchedWriteRes<[ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup169 : SchedWriteRes<[ADLPPort02_03_10, ADLPPort05]> { let Latency = 10; let NumMicroOps = 2; } @@ -1685,14 +1684,14 @@ def : InstRW<[ADLPWriteResGroup170], (instregex "^(V?)PACK(S|U)S(DW|WB)rr$", "^VPACK(S|U)S(DW|WB)Yrr$")>; def : InstRW<[ADLPWriteResGroup170], (instrs VPCMPGTQYrr)>; -def ADLPWriteResGroup171 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup171 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort02_03_10]> { let Latency = 8; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup171, ReadAfterVecXLd], (instregex "^(V?)P(ADD|SUB)(B|D|Q|W)rm$")>; def : InstRW<[ADLPWriteResGroup171, ReadAfterVecXLd], (instrs VPBLENDDrmi)>; -def ADLPWriteResGroup172 : SchedWriteRes<[ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup172 : SchedWriteRes<[ADLPPort02_03_10, ADLPPort05]> { let Latency = 8; let NumMicroOps = 2; } @@ -1710,7 +1709,7 @@ def ADLPWriteResGroup174 : SchedWriteRes<[ADLPPort00_06, ADLPPort05]> { } def : InstRW<[ADLPWriteResGroup174], (instrs PAUSE)>; -def ADLPWriteResGroup175 : SchedWriteRes<[ADLPPort01, ADLPPort02_03_11]> { +def ADLPWriteResGroup175 : SchedWriteRes<[ADLPPort01, ADLPPort02_03_10]> { let Latency = 8; let NumMicroOps = 2; } @@ -1722,7 +1721,7 @@ def ADLPWriteResGroup176 : SchedWriteRes<[ADLPPort01_05, ADLPPort04_09, ADLPPort } def : InstRW<[ADLPWriteResGroup176], (instregex "^(V?)PEXTR(D|Q)mri$")>; -def ADLPWriteResGroup177 : SchedWriteRes<[ADLPPort00_01, ADLPPort01_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup177 : SchedWriteRes<[ADLPPort00_01, ADLPPort01_05, ADLPPort02_03_10]> { let ReleaseAtCycles = [1, 2, 1]; let Latency = 9; let NumMicroOps = 4; @@ -1737,7 +1736,7 @@ def ADLPWriteResGroup178 : SchedWriteRes<[ADLPPort00_01, ADLPPort01_05]> { def : InstRW<[ADLPWriteResGroup178], (instregex "^(V?)PH(ADD|SUB)SWrr$", "^VPH(ADD|SUB)SWYrr$")>; -def ADLPWriteResGroup179 : SchedWriteRes<[ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup179 : SchedWriteRes<[ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let Latency = 12; let NumMicroOps = 3; } @@ -1751,41 +1750,41 @@ def : InstRW<[ADLPWriteResGroup180], (instregex "^POPA(16|32)$", "^PREFETCHIT(0|1)$")>; def : InstRW<[ADLPWriteResGroup180], (instrs POPF32)>; -def ADLPWriteResGroup181 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11]> { +def ADLPWriteResGroup181 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10]> { let ReleaseAtCycles = [6, 2, 1, 1]; let Latency = 5; let NumMicroOps = 10; } def : InstRW<[ADLPWriteResGroup181], (instrs POPF16)>; -def ADLPWriteResGroup182 : SchedWriteRes<[ADLPPort00_06, ADLPPort01, ADLPPort02_03_11]> { +def ADLPWriteResGroup182 : SchedWriteRes<[ADLPPort00_06, ADLPPort01, ADLPPort02_03_10]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 5; let NumMicroOps = 7; } def : InstRW<[ADLPWriteResGroup182], (instrs POPF64)>; -def ADLPWriteResGroup183 : SchedWriteRes<[ADLPPort02_03_11]> { +def ADLPWriteResGroup183 : SchedWriteRes<[ADLPPort02_03_10]> { let Latency = 0; } def : InstRW<[ADLPWriteResGroup183], (instregex "^PREFETCHT(0|1|2)$")>; def : InstRW<[ADLPWriteResGroup183], (instrs PREFETCHNTA)>; -def ADLPWriteResGroup184 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11, ADLPPort06]> { +def ADLPWriteResGroup184 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort02_03_10, ADLPPort06]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 4; } def : InstRW<[ADLPWriteResGroup184], (instregex "^PTWRITE((64)?)m$")>; -def ADLPWriteResGroup185 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort06]> { +def ADLPWriteResGroup185 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort06]> { let ReleaseAtCycles = [1, 2]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 3; } def : InstRW<[ADLPWriteResGroup185], (instrs PTWRITE64r)>; -def ADLPWriteResGroup186 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort06]> { +def ADLPWriteResGroup186 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort06]> { let ReleaseAtCycles = [2, 2]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 4; @@ -1797,7 +1796,7 @@ def ADLPWriteResGroup187 : SchedWriteRes<[ADLPPort04_09, ADLPPort07_08]> { } def : InstRW<[ADLPWriteResGroup187], (instregex "^PUSH64r((mr)?)$")>; -def ADLPWriteResGroup188 : SchedWriteRes<[ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup188 : SchedWriteRes<[ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let NumMicroOps = 3; } def : InstRW<[ADLPWriteResGroup188], (instrs PUSH64rmm)>; @@ -1818,49 +1817,49 @@ def ADLPWriteResGroup191 : SchedWriteRes<[ADLPPort01, ADLPPort04_09, ADLPPort07_ } def : InstRW<[ADLPWriteResGroup191], (instregex "^PUSH(F|G)S64$")>; -def ADLPWriteResGroup192 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> { +def ADLPWriteResGroup192 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01]> { let ReleaseAtCycles = [2, 3, 2]; let Latency = 8; let NumMicroOps = 7; } def : InstRW<[ADLPWriteResGroup192], (instregex "^RC(L|R)(16|32|64)rCL$")>; -def ADLPWriteResGroup193 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06]> { +def ADLPWriteResGroup193 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06]> { let ReleaseAtCycles = [1, 2]; let Latency = 13; let NumMicroOps = 3; } def : InstRW<[ADLPWriteResGroup193, WriteRMW], (instregex "^RC(L|R)8m(1|i)$")>; -def ADLPWriteResGroup194 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> { +def ADLPWriteResGroup194 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01]> { let ReleaseAtCycles = [1, 5, 2]; let Latency = 20; let NumMicroOps = 8; } def : InstRW<[ADLPWriteResGroup194, WriteRMW], (instrs RCL8mCL)>; -def ADLPWriteResGroup195 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> { +def ADLPWriteResGroup195 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01]> { let ReleaseAtCycles = [2, 5, 2]; let Latency = 7; let NumMicroOps = 9; } def : InstRW<[ADLPWriteResGroup195], (instrs RCL8rCL)>; -def ADLPWriteResGroup196 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> { +def ADLPWriteResGroup196 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01]> { let ReleaseAtCycles = [2, 4, 3]; let Latency = 20; let NumMicroOps = 9; } def : InstRW<[ADLPWriteResGroup196, WriteRMW], (instrs RCR8mCL)>; -def ADLPWriteResGroup197 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> { +def ADLPWriteResGroup197 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01]> { let ReleaseAtCycles = [3, 4, 3]; let Latency = 9; let NumMicroOps = 10; } def : InstRW<[ADLPWriteResGroup197], (instrs RCR8rCL)>; -def ADLPWriteResGroup198 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPPort00_05, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort01_05_10, ADLPPort05]> { +def ADLPWriteResGroup198 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPPort00_05, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort01_05_11, ADLPPort05]> { let ReleaseAtCycles = [1, 6, 1, 10, 20, 8, 5, 1, 2]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 54; @@ -1872,48 +1871,48 @@ def ADLPWriteResGroup199 : SchedWriteRes<[ADLPPort01]> { } def : InstRW<[ADLPWriteResGroup199], (instrs RDPID64)>; -def ADLPWriteResGroup200 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> { +def ADLPWriteResGroup200 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01]> { let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 3; } def : InstRW<[ADLPWriteResGroup200], (instrs RDPKRUr)>; -def ADLPWriteResGroup201 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort05]> { +def ADLPWriteResGroup201 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort05]> { let ReleaseAtCycles = [9, 6, 2, 1]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 18; } def : InstRW<[ADLPWriteResGroup201], (instrs RDPMC)>; -def ADLPWriteResGroup202 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup202 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [2, 3, 2, 5, 7, 3, 1, 2]; let Latency = 1386; let NumMicroOps = 25; } def : InstRW<[ADLPWriteResGroup202], (instrs RDRAND16r)>; -def ADLPWriteResGroup203 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup203 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [2, 3, 2, 5, 7, 3, 1, 2]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 25; } def : InstRW<[ADLPWriteResGroup203], (instregex "^RDRAND(32|64)r$")>; -def ADLPWriteResGroup204 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup204 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [2, 3, 3, 5, 7, 1, 4]; let Latency = 1381; let NumMicroOps = 25; } def : InstRW<[ADLPWriteResGroup204], (instrs RDSEED16r)>; -def ADLPWriteResGroup205 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup205 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [2, 3, 3, 5, 7, 1, 4]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 25; } def : InstRW<[ADLPWriteResGroup205], (instregex "^RDSEED(32|64)r$")>; -def ADLPWriteResGroup206 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort05]> { +def ADLPWriteResGroup206 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort05]> { let ReleaseAtCycles = [5, 6, 3, 1]; let Latency = 18; let NumMicroOps = 15; @@ -1927,13 +1926,13 @@ def ADLPWriteResGroup207 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_ } def : InstRW<[ADLPWriteResGroup207], (instrs RDTSCP)>; -def ADLPWriteResGroup208 : SchedWriteRes<[ADLPPort00_06, ADLPPort02_03_11]> { +def ADLPWriteResGroup208 : SchedWriteRes<[ADLPPort00_06, ADLPPort02_03_10]> { let Latency = 7; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup208], (instrs RET64)>; -def ADLPWriteResGroup209 : SchedWriteRes<[ADLPPort00_06, ADLPPort02_03_11]> { +def ADLPWriteResGroup209 : SchedWriteRes<[ADLPPort00_06, ADLPPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 6; let NumMicroOps = 3; @@ -1978,7 +1977,7 @@ def ADLPWriteResGroup215 : SchedWriteRes<[ADLPPort00_06]> { def : InstRW<[ADLPWriteResGroup215, WriteRMW], (instregex "^S(A|H)R8m(1|i)$", "^SHL8m(1|i)$")>; -def ADLPWriteResGroup216 : SchedWriteRes<[ADLPPort00_06, ADLPPort02_03_11]> { +def ADLPWriteResGroup216 : SchedWriteRes<[ADLPPort00_06, ADLPPort02_03_10]> { let Latency = 8; let NumMicroOps = 2; } @@ -1991,7 +1990,7 @@ def ADLPWriteResGroup217 : SchedWriteRes<[ADLPPort00_06]> { def : InstRW<[ADLPWriteResGroup217], (instregex "^S(A|H)RX(32|64)rr$", "^SHLX(32|64)rr$")>; -def ADLPWriteResGroup218 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup218 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [2, 2, 1, 1, 1]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 7; @@ -2004,14 +2003,14 @@ def ADLPWriteResGroup219 : SchedWriteRes<[ADLPPort04_09, ADLPPort07_08]> { } def : InstRW<[ADLPWriteResGroup219], (instrs SFENCE)>; -def ADLPWriteResGroup220 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort01, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup220 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort01, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [1, 2, 2, 2]; let Latency = 21; let NumMicroOps = 7; } def : InstRW<[ADLPWriteResGroup220], (instregex "^S(G|I)DT64m$")>; -def ADLPWriteResGroup221 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup221 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort02_03_10, ADLPPort05]> { let Latency = 9; let NumMicroOps = 3; } @@ -2023,7 +2022,7 @@ def ADLPWriteResGroup222 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort05]> { } def : InstRW<[ADLPWriteResGroup222], (instrs SHA1MSG1rr)>; -def ADLPWriteResGroup223 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPPort00_06, ADLPPort01_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup223 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPPort00_06, ADLPPort01_05, ADLPPort02_03_10]> { let ReleaseAtCycles = [2, 2, 1, 2, 1]; let Latency = 13; let NumMicroOps = 8; @@ -2037,7 +2036,7 @@ def ADLPWriteResGroup224 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPP } def : InstRW<[ADLPWriteResGroup224], (instrs SHA1MSG2rr)>; -def ADLPWriteResGroup225 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPPort01_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup225 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPPort01_05, ADLPPort02_03_10]> { let Latency = 8; let NumMicroOps = 4; } @@ -2049,7 +2048,7 @@ def ADLPWriteResGroup226 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPP } def : InstRW<[ADLPWriteResGroup226], (instrs SHA1NEXTErr)>; -def ADLPWriteResGroup227 : SchedWriteRes<[ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup227 : SchedWriteRes<[ADLPPort02_03_10, ADLPPort05]> { let Latency = 13; let NumMicroOps = 2; } @@ -2062,7 +2061,7 @@ def ADLPWriteResGroup228 : SchedWriteRes<[ADLPPort05]> { def : InstRW<[ADLPWriteResGroup228], (instrs SHA1RNDS4rri, SHA256RNDS2rr)>; -def ADLPWriteResGroup229 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPPort00_06, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup229 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPPort00_06, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [3, 2, 1, 1, 1]; let Latency = 12; let NumMicroOps = 8; @@ -2076,7 +2075,7 @@ def ADLPWriteResGroup230 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPP } def : InstRW<[ADLPWriteResGroup230], (instrs SHA256MSG1rr)>; -def ADLPWriteResGroup231 : SchedWriteRes<[ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup231 : SchedWriteRes<[ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [1, 2]; let Latency = 13; let NumMicroOps = 3; @@ -2090,63 +2089,63 @@ def ADLPWriteResGroup232 : SchedWriteRes<[ADLPPort05]> { } def : InstRW<[ADLPWriteResGroup232], (instrs SHA256MSG2rr)>; -def ADLPWriteResGroup233 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup233 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let Latency = 13; let NumMicroOps = 5; } def : InstRW<[ADLPWriteResGroup233], (instrs SHRD16mri8)>; -def ADLPWriteResGroup234 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort01]> { +def ADLPWriteResGroup234 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort01]> { let Latency = 6; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup234], (instregex "^SLDT(32|64)r$")>; -def ADLPWriteResGroup235 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort05]> { +def ADLPWriteResGroup235 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort05]> { let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup235], (instrs SMSW16r)>; -def ADLPWriteResGroup236 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort05]> { +def ADLPWriteResGroup236 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort05]> { let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup236], (instregex "^SMSW(32|64)r$")>; -def ADLPWriteResGroup237 : SchedWriteRes<[ADLPPort00, ADLPPort02_03_11]> { +def ADLPWriteResGroup237 : SchedWriteRes<[ADLPPort00, ADLPPort02_03_10]> { let Latency = 24; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup237, ReadAfterVecLd], (instregex "^(V?)SQRTSDm_Int$")>; -def ADLPWriteResGroup238 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06]> { +def ADLPWriteResGroup238 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06]> { let Latency = 6; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup238], (instrs STD)>; -def ADLPWriteResGroup239 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> { +def ADLPWriteResGroup239 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01]> { let ReleaseAtCycles = [1, 4, 1]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 6; } def : InstRW<[ADLPWriteResGroup239], (instrs STI)>; -def ADLPWriteResGroup240 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup240 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 8; let NumMicroOps = 4; } def : InstRW<[ADLPWriteResGroup240], (instrs STOSB)>; -def ADLPWriteResGroup241 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup241 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 7; let NumMicroOps = 4; } def : InstRW<[ADLPWriteResGroup241], (instregex "^STOS(L|Q|W)$")>; -def ADLPWriteResGroup242 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort01]> { +def ADLPWriteResGroup242 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort01]> { let Latency = 5; let NumMicroOps = 2; } @@ -2158,7 +2157,7 @@ def ADLPWriteResGroup243 : SchedWriteRes<[ADLPPort00]> { def : InstRW<[ADLPWriteResGroup243], (instregex "^(TST|XAM)_F$")>; def : InstRW<[ADLPWriteResGroup243], (instrs UCOM_FPPr)>; -def ADLPWriteResGroup244 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup244 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort02_03_10]> { let ReleaseAtCycles = [3, 1]; let Latency = 9; let NumMicroOps = 4; @@ -2174,35 +2173,35 @@ def ADLPWriteResGroup245 : SchedWriteRes<[ADLPPort00_01_05]> { def : InstRW<[ADLPWriteResGroup245], (instregex "^VBLENDVP(D|S)rrr$")>; def : InstRW<[ADLPWriteResGroup245], (instrs VPBLENDVBrrr)>; -def ADLPWriteResGroup246 : SchedWriteRes<[ADLPPort00, ADLPPort01, ADLPPort02_03_11]> { +def ADLPWriteResGroup246 : SchedWriteRes<[ADLPPort00, ADLPPort01, ADLPPort02_03_10]> { let ReleaseAtCycles = [6, 7, 18]; let Latency = 81; let NumMicroOps = 31; } def : InstRW<[ADLPWriteResGroup246], (instrs VERRm)>; -def ADLPWriteResGroup247 : SchedWriteRes<[ADLPPort00, ADLPPort01, ADLPPort02_03_11]> { +def ADLPWriteResGroup247 : SchedWriteRes<[ADLPPort00, ADLPPort01, ADLPPort02_03_10]> { let ReleaseAtCycles = [6, 7, 17]; let Latency = 74; let NumMicroOps = 30; } def : InstRW<[ADLPWriteResGroup247], (instrs VERRr)>; -def ADLPWriteResGroup248 : SchedWriteRes<[ADLPPort00, ADLPPort01, ADLPPort02_03_11]> { +def ADLPWriteResGroup248 : SchedWriteRes<[ADLPPort00, ADLPPort01, ADLPPort02_03_10]> { let ReleaseAtCycles = [5, 8, 21]; let Latency = 81; let NumMicroOps = 34; } def : InstRW<[ADLPWriteResGroup248], (instrs VERWm)>; -def ADLPWriteResGroup249 : SchedWriteRes<[ADLPPort00, ADLPPort01, ADLPPort02_03_11]> { +def ADLPWriteResGroup249 : SchedWriteRes<[ADLPPort00, ADLPPort01, ADLPPort02_03_10]> { let ReleaseAtCycles = [5, 8, 20]; let Latency = 74; let NumMicroOps = 33; } def : InstRW<[ADLPWriteResGroup249], (instrs VERWr)>; -def ADLPWriteResGroup250 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort01_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup250 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort01_05, ADLPPort02_03_10]> { let ReleaseAtCycles = [1, 1, 2, 4]; let Latency = 29; let NumMicroOps = 8; @@ -2212,7 +2211,7 @@ def : InstRW<[ADLPWriteResGroup250, WriteVecMaskedGatherWriteback], (instregex " def : InstRW<[ADLPWriteResGroup250, WriteVecMaskedGatherWriteback], (instrs VGATHERQPSYrm, VPGATHERQDYrm)>; -def ADLPWriteResGroup251 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort01_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup251 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort01_05, ADLPPort02_03_10]> { let ReleaseAtCycles = [1, 1, 1, 2]; let Latency = 20; let NumMicroOps = 5; @@ -2222,7 +2221,7 @@ def : InstRW<[ADLPWriteResGroup251, WriteVecMaskedGatherWriteback], (instregex " def : InstRW<[ADLPWriteResGroup251, WriteVecMaskedGatherWriteback], (instrs VGATHERQPSrm, VPGATHERQDrm)>; -def ADLPWriteResGroup252 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort01_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup252 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort01_05, ADLPPort02_03_10]> { let ReleaseAtCycles = [1, 1, 2, 8]; let Latency = 30; let NumMicroOps = 12; @@ -2230,7 +2229,7 @@ def ADLPWriteResGroup252 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort def : InstRW<[ADLPWriteResGroup252, WriteVecMaskedGatherWriteback], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>; -def ADLPWriteResGroup253 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort01_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup253 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort01_05, ADLPPort02_03_10]> { let ReleaseAtCycles = [1, 1, 2, 4]; let Latency = 28; let NumMicroOps = 8; @@ -2245,14 +2244,14 @@ def ADLPWriteResGroup254 : SchedWriteRes<[ADLPPort01_05, ADLPPort05]> { } def : InstRW<[ADLPWriteResGroup254], (instregex "^VH(ADD|SUB)P(D|S)rr$")>; -def ADLPWriteResGroup255 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup255 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort02_03_10]> { let Latency = 9; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup255, ReadAfterVecYLd], (instregex "^VINSERT(F|I)128rmi$", "^VP(ADD|SUB)(B|D|Q|W)Yrm$")>; -def ADLPWriteResGroup256 : SchedWriteRes<[ADLPPort00, ADLPPort00_06, ADLPPort02_03_11]> { +def ADLPWriteResGroup256 : SchedWriteRes<[ADLPPort00, ADLPPort00_06, ADLPPort02_03_10]> { let Latency = 7; let NumMicroOps = 3; } @@ -2294,7 +2293,7 @@ def ADLPWriteResGroup262 : SchedWriteRes<[ADLPPort04_09, ADLPPort07_08]> { } def : InstRW<[ADLPWriteResGroup262], (instrs VMOVNTPSmr)>; -def ADLPWriteResGroup263 : SchedWriteRes<[ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup263 : SchedWriteRes<[ADLPPort02_03_10, ADLPPort05]> { let Latency = 11; let NumMicroOps = 2; } @@ -2302,21 +2301,21 @@ def : InstRW<[ADLPWriteResGroup263, ReadAfterVecYLd], (instregex "^VPACK(S|U)S(D def : InstRW<[ADLPWriteResGroup263, ReadAfterVecYLd], (instrs VPCMPGTQYrm)>; def : InstRW<[ADLPWriteResGroup263, ReadAfterVecXLd], (instrs VPCLMULQDQYrmi)>; -def ADLPWriteResGroup264 : SchedWriteRes<[ADLPPort01_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup264 : SchedWriteRes<[ADLPPort01_05, ADLPPort02_03_10]> { let Latency = 9; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup264, ReadAfterVecYLd], (instregex "^VSHUFP(D|S)Yrmi$")>; def : InstRW<[ADLPWriteResGroup264, ReadAfterVecYLd], (instrs VPBLENDWYrmi)>; -def ADLPWriteResGroup266 : SchedWriteRes<[ADLPPort00_01, ADLPPort01_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup266 : SchedWriteRes<[ADLPPort00_01, ADLPPort01_05, ADLPPort02_03_10]> { let ReleaseAtCycles = [1, 2, 1]; let Latency = 10; let NumMicroOps = 4; } def : InstRW<[ADLPWriteResGroup266, ReadAfterVecYLd], (instregex "^VPH(ADD|SUB)SWYrm$")>; -def ADLPWriteResGroup267 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10]> { +def ADLPWriteResGroup267 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort01_05_11]> { let ReleaseAtCycles = [1, 2, 3, 3, 1]; let Latency = 16; let NumMicroOps = 10; @@ -2337,42 +2336,42 @@ def ADLPWriteResGroup269 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_ } def : InstRW<[ADLPWriteResGroup269], (instrs WRMSR)>; -def ADLPWriteResGroup270 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort05]> { +def ADLPWriteResGroup270 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort05]> { let ReleaseAtCycles = [2, 1, 4, 1]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 8; } def : InstRW<[ADLPWriteResGroup270], (instrs WRPKRUr)>; -def ADLPWriteResGroup271 : SchedWriteRes<[ADLPPort00_01_05_06_10]> { +def ADLPWriteResGroup271 : SchedWriteRes<[ADLPPort00_01_05_06_11]> { let ReleaseAtCycles = [2]; let Latency = 12; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup271, WriteRMW], (instregex "^XADD(16|32|64)rm$")>; -def ADLPWriteResGroup272 : SchedWriteRes<[ADLPPort00_01_05_06_10]> { +def ADLPWriteResGroup272 : SchedWriteRes<[ADLPPort00_01_05_06_11]> { let ReleaseAtCycles = [2]; let Latency = 13; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup272, WriteRMW], (instrs XADD8rm)>; -def ADLPWriteResGroup273 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06]> { +def ADLPWriteResGroup273 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06]> { let ReleaseAtCycles = [4, 1]; let Latency = 39; let NumMicroOps = 5; } def : InstRW<[ADLPWriteResGroup273, WriteRMW], (instregex "^XCHG(16|32)rm$")>; -def ADLPWriteResGroup274 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06]> { +def ADLPWriteResGroup274 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06]> { let ReleaseAtCycles = [5, 1]; let Latency = 39; let NumMicroOps = 6; } def : InstRW<[ADLPWriteResGroup274, WriteRMW], (instrs XCHG64rm)>; -def ADLPWriteResGroup275 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06]> { +def ADLPWriteResGroup275 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06]> { let ReleaseAtCycles = [4, 1]; let Latency = 40; let NumMicroOps = 5; @@ -2386,14 +2385,14 @@ def ADLPWriteResGroup276 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06, ADLPP } def : InstRW<[ADLPWriteResGroup276], (instrs XCH_F)>; -def ADLPWriteResGroup277 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01]> { +def ADLPWriteResGroup277 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01]> { let ReleaseAtCycles = [7, 3, 8, 5]; let Latency = 4; let NumMicroOps = 23; } def : InstRW<[ADLPWriteResGroup277], (instrs XGETBV)>; -def ADLPWriteResGroup278 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11]> { +def ADLPWriteResGroup278 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 7; let NumMicroOps = 3; @@ -2408,63 +2407,63 @@ def ADLPWriteResGroup279 : SchedWriteRes<[ADLPPort00_01_05_06, ADLPPort01, ADLPP def : InstRW<[ADLPWriteResGroup279], (instregex "^XRSTOR((S|64)?)$")>; def : InstRW<[ADLPWriteResGroup279], (instrs XRSTORS64)>; -def ADLPWriteResGroup280 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup280 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [14, 25, 44, 21, 21, 4, 1, 9, 1]; let Latency = 42; let NumMicroOps = 140; } def : InstRW<[ADLPWriteResGroup280], (instrs XSAVE)>; -def ADLPWriteResGroup281 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup281 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [14, 25, 44, 21, 21, 4, 1, 9, 1]; let Latency = 41; let NumMicroOps = 140; } def : InstRW<[ADLPWriteResGroup281], (instrs XSAVE64)>; -def ADLPWriteResGroup282 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup282 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [1, 19, 36, 52, 23, 4, 2, 12, 2]; let Latency = 42; let NumMicroOps = 151; } def : InstRW<[ADLPWriteResGroup282], (instrs XSAVEC)>; -def ADLPWriteResGroup283 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup283 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [1, 19, 36, 53, 23, 4, 2, 12, 2]; let Latency = 42; let NumMicroOps = 152; } def : InstRW<[ADLPWriteResGroup283], (instrs XSAVEC64)>; -def ADLPWriteResGroup284 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup284 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [25, 35, 52, 27, 4, 1, 10, 1]; let Latency = 46; let NumMicroOps = 155; } def : InstRW<[ADLPWriteResGroup284], (instrs XSAVEOPT)>; -def ADLPWriteResGroup285 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup285 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [25, 35, 53, 27, 4, 1, 10, 1]; let Latency = 46; let NumMicroOps = 156; } def : InstRW<[ADLPWriteResGroup285], (instrs XSAVEOPT64)>; -def ADLPWriteResGroup286 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup286 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [23, 32, 53, 29, 30, 4, 2, 9, 2]; let Latency = 42; let NumMicroOps = 184; } def : InstRW<[ADLPWriteResGroup286], (instrs XSAVES)>; -def ADLPWriteResGroup287 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup287 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [23, 33, 53, 29, 32, 4, 2, 8, 2]; let Latency = 42; let NumMicroOps = 186; } def : InstRW<[ADLPWriteResGroup287], (instrs XSAVES64)>; -def ADLPWriteResGroup288 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort00_01_05_06_10, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort05]> { +def ADLPWriteResGroup288 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort00_01_05_06_11, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05_11, ADLPPort05]> { let ReleaseAtCycles = [4, 23, 2, 14, 8, 1, 2]; let Latency = 5; let NumMicroOps = 54; diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 699ca91cd1f8f..5b50e1943e3db 100644 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -367,32 +367,37 @@ defm : BWWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : BWWriteResPair; -defm : BWWriteResPair; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteResPairUnsupported; defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : BWWriteResPair; -defm : BWWriteResPair; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteResPairUnsupported; defm : X86WriteRes; -defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : BWWriteResPair; +defm : X86WriteRes; defm : X86WriteResPairUnsupported; defm : BWWriteResPair; defm : BWWriteResPair; defm : BWWriteResPair; defm : X86WriteResPairUnsupported; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteResUnsupported; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteResUnsupported; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index b820418bb5519..d06e8a9937097 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -364,33 +364,41 @@ defm : HWWriteResPair; defm : HWWriteResPair; // Unsupported = 1 defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : X86WriteRes; -defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; // Unsupported = 1 +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : X86WriteRes; -defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; // Unsupported = 1 +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : X86WriteRes; -defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 +defm : X86WriteRes; defm : X86WriteRes; -defm : HWWriteResPair; -defm : HWWriteResPair; // Unsupported = 1 +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; // Unsupported = 1 -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; // Unsupported = 1 -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; // Unsupported = 1 +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : X86WriteRes; defm : X86WriteRes; @@ -983,7 +991,6 @@ def HWWriteResGroup12 : SchedWriteRes<[HWPort1,HWPort23]> { let NumMicroOps = 2; let ReleaseAtCycles = [1,1]; } -def: InstRW<[HWWriteResGroup12], (instrs MMX_CVTPI2PSrm)>; def: InstRW<[HWWriteResGroup12], (instregex "P(DEP|EXT)(32|64)rm")>; def HWWriteResGroup13 : SchedWriteRes<[HWPort5,HWPort23]> { @@ -1349,13 +1356,6 @@ def HWWriteResGroup75 : SchedWriteRes<[HWPort1,HWPort23]> { } def: InstRW<[HWWriteResGroup75], (instregex "FICOM(P?)(16|32)m")>; -def HWWriteResGroup78_1 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> { - let Latency = 9; - let NumMicroOps = 3; - let ReleaseAtCycles = [1,1,1]; -} -def: InstRW<[HWWriteResGroup78_1], (instrs MMX_CVTPI2PDrm)>; - def HWWriteResGroup80 : SchedWriteRes<[HWPort5,HWPort23,HWPort015]> { let Latency = 9; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 7be9f51bcd46b..775ad6b1078a5 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -348,28 +348,33 @@ defm : X86WriteRes defm : X86WriteRes; defm : X86WriteRes; // Unsupported = 1 -defm : SBWriteResPair; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; // Unsupported = 1 +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; // Unsupported = 1 +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; // Unsupported = 1 -defm : SBWriteResPair; -defm : SBWriteResPair; -defm : SBWriteResPair; // Unsupported = 1 - -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; // Unsupported = 1 -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; // Unsupported = 1 +// F16C Instructions (IvyBridge+) +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 + +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 // Vector integer operations. defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86SchedSapphireRapids.td b/llvm/lib/Target/X86/X86SchedSapphireRapids.td index 8a23d1b103aa6..e04ff68d278b2 100644 --- a/llvm/lib/Target/X86/X86SchedSapphireRapids.td +++ b/llvm/lib/Target/X86/X86SchedSapphireRapids.td @@ -56,15 +56,15 @@ def SPRPort00_05 : ProcResGroup<[SPRPort00, SPRPort05]>; def SPRPort00_05_06 : ProcResGroup<[SPRPort00, SPRPort05, SPRPort06]>; def SPRPort00_06 : ProcResGroup<[SPRPort00, SPRPort06]>; def SPRPort01_05 : ProcResGroup<[SPRPort01, SPRPort05]>; -def SPRPort01_05_10 : ProcResGroup<[SPRPort01, SPRPort05, SPRPort10]>; +def SPRPort01_05_11 : ProcResGroup<[SPRPort01, SPRPort05, SPRPort11]>; def SPRPort02_03 : ProcResGroup<[SPRPort02, SPRPort03]>; -def SPRPort02_03_11 : ProcResGroup<[SPRPort02, SPRPort03, SPRPort11]>; +def SPRPort02_03_10 : ProcResGroup<[SPRPort02, SPRPort03, SPRPort10]>; def SPRPort05_11 : ProcResGroup<[SPRPort05, SPRPort11]>; def SPRPort07_08 : ProcResGroup<[SPRPort07, SPRPort08]>; // EU has 112 reservation stations. -def SPRPort00_01_05_06_10 : ProcResGroup<[SPRPort00, SPRPort01, SPRPort05, - SPRPort06, SPRPort10]> { +def SPRPort00_01_05_06_11 : ProcResGroup<[SPRPort00, SPRPort01, SPRPort05, + SPRPort06, SPRPort11]> { let BufferSize = 112; } @@ -74,8 +74,8 @@ def SPRPort04_09 : ProcResGroup<[SPRPort04, SPRPort09]> { } // MEM has 72 reservation stations. -def SPRPort02_03_07_08_11 : ProcResGroup<[SPRPort02, SPRPort03, SPRPort07, - SPRPort08, SPRPort11]> { +def SPRPort02_03_07_08_10 : ProcResGroup<[SPRPort02, SPRPort03, SPRPort07, + SPRPort08, SPRPort10]> { let BufferSize = 72; } @@ -113,7 +113,7 @@ multiclass SPRWriteResPair { + def : WriteRes { let Latency = !add(Lat, LoadLat); let ReleaseAtCycles = !listconcat([1], Res); let NumMicroOps = !add(UOps, LoadUOps); @@ -126,71 +126,71 @@ multiclass SPRWriteResPair; -defm : X86WriteRes; +defm : X86WriteRes; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : X86WriteRes; -defm : X86WriteRes; -def : WriteRes; -def : WriteRes { +defm : X86WriteRes; +def : WriteRes; +def : WriteRes { let Latency = 11; } defm : SPRWriteResPair; -defm : SPRWriteResPair; +defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; def : WriteRes; defm : X86WriteRes; defm : SPRWriteResPair; def : WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; def : WriteRes; def : WriteRes { let Latency = 11; } -defm : X86WriteRes; +defm : X86WriteRes; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : SPRWriteResPair; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : SPRWriteResPair; defm : X86WriteRes; defm : X86WriteRes; @@ -202,12 +202,12 @@ defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; -defm : SPRWriteResPair; -defm : SPRWriteResPair; +defm : SPRWriteResPair; +defm : SPRWriteResPair; defm : SPRWriteResPair; defm : X86WriteRes; defm : X86WriteRes; @@ -235,7 +235,7 @@ defm : SPRWriteResPair; def : WriteRes { let Latency = 3; } -defm : X86WriteRes; +defm : X86WriteRes; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; @@ -251,13 +251,13 @@ defm : SPRWriteResPair; def : WriteRes; defm : X86WriteRes; defm : X86WriteRes; -def : WriteRes { +def : WriteRes { let Latency = 7; } -def : WriteRes { +def : WriteRes { let Latency = 7; } -def : WriteRes { +def : WriteRes { let Latency = 8; } defm : SPRWriteResPair; @@ -270,8 +270,8 @@ defm : SPRWriteResPair; def : WriteRes { let Latency = 3; } -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; @@ -334,15 +334,15 @@ defm : SPRWriteResPair; def : WriteRes { let Latency = 2; } -defm : SPRWriteResPair; -defm : SPRWriteResPair; +defm : SPRWriteResPair; +defm : SPRWriteResPair; defm : SPRWriteResPair; defm : X86WriteRes; defm : X86WriteRes; -defm : SPRWriteResPair; -defm : SPRWriteResPair; +defm : SPRWriteResPair; +defm : SPRWriteResPair; defm : SPRWriteResPair; -defm : SPRWriteResPair; +defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; @@ -359,10 +359,10 @@ defm : SPRWriteResPair; def : WriteRes { let Latency = 3; } -defm : X86WriteRes; +defm : X86WriteRes; def : WriteRes; defm : SPRWriteResPair; -def : WriteRes { +def : WriteRes { let Latency = 5; } def : WriteRes { @@ -370,7 +370,7 @@ def : WriteRes { } defm : SPRWriteResPair; defm : SPRWriteResPair; -defm : SPRWriteResPair; +defm : SPRWriteResPair; defm : SPRWriteResPair; def : WriteRes { let Latency = SapphireRapidsModel.MaxLatency; @@ -380,9 +380,9 @@ def : WriteRes { } defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; @@ -397,16 +397,16 @@ defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; def : WriteRes { let Latency = 3; } @@ -434,7 +434,7 @@ defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; @@ -447,7 +447,7 @@ defm : SPRWriteResPair; def : WriteRes { let Latency = 3; } -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : SPRWriteResPair; @@ -455,20 +455,20 @@ defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : X86WriteRes; -defm : X86WriteRes; -def : WriteRes { +defm : X86WriteRes; +def : WriteRes { let Latency = 7; } -def : WriteRes { +def : WriteRes { let Latency = 7; } -def : WriteRes { +def : WriteRes { let Latency = 8; } -def : WriteRes { +def : WriteRes { let Latency = 7; } -def : WriteRes { +def : WriteRes { let Latency = 8; } defm : SPRWriteResPair; @@ -482,8 +482,8 @@ def : WriteRes { let Latency = 4; } defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; @@ -504,11 +504,11 @@ defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; @@ -516,26 +516,26 @@ defm : X86WriteRes; defm : X86WriteRes; defm : SPRWriteResPair; defm : SPRWriteResPair; -defm : X86WriteRes; +defm : X86WriteRes; def : WriteRes; // Infered SchedWriteRes and InstRW definition. -def SPRWriteResGroup0 : SchedWriteRes<[SPRPort02_03, SPRPort02_03_11, SPRPort04, SPRPort04_09]> { +def SPRWriteResGroup0 : SchedWriteRes<[SPRPort02_03, SPRPort02_03_10, SPRPort04, SPRPort04_09]> { let Latency = 7; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup0], (instregex "^AA(D|N)D64mr$", "^A(X?)OR64mr$")>; -def SPRWriteResGroup1 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup1 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [2, 1, 1, 1, 1]; let Latency = 12; let NumMicroOps = 6; } def : InstRW<[SPRWriteResGroup1, ReadAfterLd, ReadAfterLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^(ADC|SBB)(16|32|64)mr$")>; -def SPRWriteResGroup2 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_11]> { +def SPRWriteResGroup2 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_10]> { let Latency = 6; let NumMicroOps = 2; } @@ -543,20 +543,20 @@ def : InstRW<[SPRWriteResGroup2], (instregex "^RORX(32|64)mi$")>; def : InstRW<[SPRWriteResGroup2, ReadAfterLd, ReadAfterLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^(ADC|SBB)(8|16|32|64)rm$", "^AD(C|O)X(32|64)rm$")>; -def SPRWriteResGroup3 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup3 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let Latency = 13; let NumMicroOps = 5; } def : InstRW<[SPRWriteResGroup3], (instregex "^(ADC|SBB)8mi(8?)$")>; -def SPRWriteResGroup4 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup4 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [2, 1, 1, 1, 1]; let Latency = 13; let NumMicroOps = 6; } def : InstRW<[SPRWriteResGroup4, ReadAfterLd, ReadAfterLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^(ADC|SBB)8mr$")>; -def SPRWriteResGroup5 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> { +def SPRWriteResGroup5 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort02_03_10]> { let Latency = 6; let NumMicroOps = 2; } @@ -585,7 +585,7 @@ def : InstRW<[SPRWriteResGroup6], (instregex "^(ADD|SUB)64ri8$", def : InstRW<[SPRWriteResGroup6], (instrs CLC, JMP_2)>; -def SPRWriteResGroup7 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup7 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let Latency = 13; let NumMicroOps = 4; } @@ -598,7 +598,7 @@ def : InstRW<[SPRWriteResGroup7, ReadAfterLd, ReadDefault, ReadDefault, ReadDefa "^(X?)OR8mr$")>; def : InstRW<[SPRWriteResGroup7, ReadAfterLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instrs SUB8mr)>; -def SPRWriteResGroup8 : SchedWriteRes<[SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup8 : SchedWriteRes<[SPRPort01_05, SPRPort02_03_10]> { let Latency = 10; let NumMicroOps = 2; } @@ -620,7 +620,7 @@ def : InstRW<[SPRWriteResGroup9], (instregex "^(V?)(ADD|SUB)PSrr$", "^VPUNPCK(H|L)(BW|WD)Z(128|256)rrk(z?)$")>; def : InstRW<[SPRWriteResGroup9], (instrs VADDSUBPSYrr)>; -def SPRWriteResGroup10 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup10 : SchedWriteRes<[SPRPort02_03_10, SPRPort05]> { let Latency = 10; let NumMicroOps = 2; } @@ -646,7 +646,7 @@ def : InstRW<[SPRWriteResGroup10, ReadAfterVecYLd], (instregex "^VFPCLASSP(D|H|S "^VPERM(I|T)2PDZ128rmbkz$")>; def : InstRW<[SPRWriteResGroup10, ReadAfterVecYLd], (instrs VPERMBZ128rm)>; -def SPRWriteResGroup11 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup11 : SchedWriteRes<[SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 2]; let Latency = 13; let NumMicroOps = 3; @@ -695,7 +695,7 @@ def : InstRW<[SPRWriteResGroup12], (instrs ADD_FST0r, VPCMPGTQYrr, VPERMDYrr)>; -def SPRWriteResGroup13 : SchedWriteRes<[SPRPort00_01_05_06_10]> { +def SPRWriteResGroup13 : SchedWriteRes<[SPRPort00_01_05_06_11]> { let Latency = 2; } def : InstRW<[SPRWriteResGroup13], (instregex "^AND(8|16|32|64)r(r|i8)$", @@ -713,7 +713,7 @@ def : InstRW<[SPRWriteResGroup13], (instregex "^AND(8|16|32|64)r(r|i8)$", "^TEST(8|16|32|64)rr$")>; def : InstRW<[SPRWriteResGroup13], (instrs XOR8rr_NOREX)>; -def SPRWriteResGroup14 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> { +def SPRWriteResGroup14 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort02_03_10]> { let Latency = 7; let NumMicroOps = 2; } @@ -723,18 +723,18 @@ def : InstRW<[SPRWriteResGroup14, ReadAfterLd], (instregex "^(X?)OR64rm$")>; def : InstRW<[SPRWriteResGroup14, ReadAfterLd], (instrs AND64rm)>; def : InstRW<[SPRWriteResGroup14, ReadAfterLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^TEST(8|16|32|64)mr$")>; -def SPRWriteResGroup15 : SchedWriteRes<[SPRPort01_05_10, SPRPort02_03_11]> { +def SPRWriteResGroup15 : SchedWriteRes<[SPRPort01_05_11, SPRPort02_03_10]> { let Latency = 7; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup15, ReadAfterLd], (instregex "^ANDN(32|64)rm$")>; -def SPRWriteResGroup16 : SchedWriteRes<[SPRPort01_05_10]> { +def SPRWriteResGroup16 : SchedWriteRes<[SPRPort01_05_11]> { let Latency = 2; } def : InstRW<[SPRWriteResGroup16], (instregex "^ANDN(32|64)rr$")>; -def SPRWriteResGroup17 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11]> { +def SPRWriteResGroup17 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10]> { let ReleaseAtCycles = [5, 2, 1, 1]; let Latency = 10; let NumMicroOps = 9; @@ -747,14 +747,14 @@ def SPRWriteResGroup18 : SchedWriteRes<[SPRPort01]> { def : InstRW<[SPRWriteResGroup18], (instregex "^BT((C|R|S)?)64rr$", "^P(DEP|EXT)(32|64)rr$")>; -def SPRWriteResGroup19 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup19 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [4, 2, 1, 1, 1, 1]; let Latency = 17; let NumMicroOps = 10; } def : InstRW<[SPRWriteResGroup19], (instregex "^BT(C|R|S)64mr$")>; -def SPRWriteResGroup20 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup20 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let Latency = 7; let NumMicroOps = 5; } @@ -789,25 +789,25 @@ def SPRWriteResGroup24 : SchedWriteRes<[SPRPort00_06]>; def : InstRW<[SPRWriteResGroup24], (instregex "^C(DQ|QO)$", "^(CL|ST)AC$")>; -def SPRWriteResGroup25 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> { +def SPRWriteResGroup25 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06]> { let Latency = 3; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup25], (instrs CLD)>; -def SPRWriteResGroup26 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup26 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort04_09, SPRPort07_08]> { let Latency = 3; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup26], (instrs CLDEMOTE)>; -def SPRWriteResGroup27 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup27 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort04_09, SPRPort07_08]> { let Latency = 2; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup27], (instrs CLFLUSH)>; -def SPRWriteResGroup28 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup28 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort04_09, SPRPort07_08]> { let Latency = 2; let NumMicroOps = 3; } @@ -827,35 +827,35 @@ def SPRWriteResGroup30 : SchedWriteRes<[SPRPort00_06, SPRPort01, SPRPort05]> { } def : InstRW<[SPRWriteResGroup30], (instrs CLTS)>; -def SPRWriteResGroup31 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup31 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort04_09, SPRPort07_08]> { let Latency = 5; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup31], (instregex "^MOV16o(16|32|64)a$")>; def : InstRW<[SPRWriteResGroup31], (instrs CLWB)>; -def SPRWriteResGroup32 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> { +def SPRWriteResGroup32 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort02_03_10]> { let ReleaseAtCycles = [5, 2]; let Latency = 6; let NumMicroOps = 7; } def : InstRW<[SPRWriteResGroup32], (instregex "^CMPS(B|L|Q|W)$")>; -def SPRWriteResGroup33 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01_05, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup33 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01_05, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [2, 7, 6, 2, 1, 1, 2, 1]; let Latency = 32; let NumMicroOps = 22; } def : InstRW<[SPRWriteResGroup33], (instrs CMPXCHG16B)>; -def SPRWriteResGroup34 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup34 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [4, 7, 2, 1, 1, 1]; let Latency = 25; let NumMicroOps = 16; } def : InstRW<[SPRWriteResGroup34], (instrs CMPXCHG8B)>; -def SPRWriteResGroup35 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup35 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [1, 2, 1, 1, 1]; let Latency = 13; let NumMicroOps = 6; @@ -869,7 +869,7 @@ def SPRWriteResGroup36 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_06, S } def : InstRW<[SPRWriteResGroup36], (instrs CPUID)>; -def SPRWriteResGroup37 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup37 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let Latency = 12; let NumMicroOps = 3; } @@ -884,7 +884,7 @@ def : InstRW<[SPRWriteResGroup37, ReadAfterVecLd], (instregex "^(V?)CVTSI642SSrm "^VCVT(U?)SI642SSZrm((_Int)?)$")>; def : InstRW<[SPRWriteResGroup37, ReadAfterVecLd], (instrs VCVTSI642SSrm)>; -def SPRWriteResGroup38 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup38 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort02_03_10]> { let Latency = 26; let NumMicroOps = 3; } @@ -926,7 +926,7 @@ def : InstRW<[SPRWriteResGroup41], (instregex "^(V?)CVT(T?)SS2SI64rr_Int$", def : InstRW<[SPRWriteResGroup41], (instrs VCVTTSS2USI64Zrr)>; def : InstRW<[SPRWriteResGroup41, ReadDefault], (instregex "^(V?)CVT(T?)SS2SI64rr$")>; -def SPRWriteResGroup42 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> { +def SPRWriteResGroup42 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06]> { let Latency = 2; let NumMicroOps = 2; } @@ -942,18 +942,18 @@ def : InstRW<[SPRWriteResGroup43], (instrs DEC16r_alt, ST_FPrr, SYSCALL)>; -def SPRWriteResGroup44 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup44 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let Latency = 7; } def : InstRW<[SPRWriteResGroup44], (instrs DEC32r_alt)>; -def SPRWriteResGroup45 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup45 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 27; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup45], (instregex "^DIVR_F(32|64)m$")>; -def SPRWriteResGroup46 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup46 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let Latency = 30; let NumMicroOps = 3; } @@ -965,14 +965,14 @@ def SPRWriteResGroup47 : SchedWriteRes<[SPRPort00]> { def : InstRW<[SPRWriteResGroup47], (instregex "^DIVR_F(P?)rST0$")>; def : InstRW<[SPRWriteResGroup47], (instrs DIVR_FST0r)>; -def SPRWriteResGroup48 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup48 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 19; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup48, ReadAfterVecLd], (instregex "^(V?)DIVSDrm$")>; def : InstRW<[SPRWriteResGroup48, ReadAfterVecLd], (instrs VDIVSDZrm)>; -def SPRWriteResGroup49 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup49 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 22; let NumMicroOps = 2; } @@ -980,7 +980,7 @@ def : InstRW<[SPRWriteResGroup49], (instregex "^DIV_F(32|64)m$")>; def : InstRW<[SPRWriteResGroup49, ReadAfterVecLd], (instregex "^VSQRTSHZm_Int((k|kz)?)$")>; def : InstRW<[SPRWriteResGroup49, ReadAfterVecLd], (instrs VSQRTSHZm)>; -def SPRWriteResGroup50 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup50 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let Latency = 25; let NumMicroOps = 3; } @@ -998,7 +998,7 @@ def : InstRW<[SPRWriteResGroup52], (instregex "^ENQCMD(S?)(16|32|64)$", "^ST_F(32|64)m$")>; def : InstRW<[SPRWriteResGroup52], (instrs PUSHF32)>; -def SPRWriteResGroup53 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup53 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [2, 21, 2, 14, 4, 9, 5]; let Latency = 126; let NumMicroOps = 57; @@ -1023,13 +1023,13 @@ def : InstRW<[SPRWriteResGroup55], (instrs MMX_PEXTRWrri, VEXTRACTPSZrri, VPERMWZrr)>; -def SPRWriteResGroup56 : SchedWriteRes<[SPRPort02_03, SPRPort02_03_11, SPRPort04, SPRPort04_09, SPRPort06]> { +def SPRWriteResGroup56 : SchedWriteRes<[SPRPort02_03, SPRPort02_03_10, SPRPort04, SPRPort04_09, SPRPort06]> { let Latency = 7; let NumMicroOps = 5; } def : InstRW<[SPRWriteResGroup56], (instrs FARCALL64m)>; -def SPRWriteResGroup57 : SchedWriteRes<[SPRPort02_03_11, SPRPort06]> { +def SPRWriteResGroup57 : SchedWriteRes<[SPRPort02_03_10, SPRPort06]> { let Latency = 6; let NumMicroOps = 2; } @@ -1051,7 +1051,7 @@ def SPRWriteResGroup59 : SchedWriteRes<[SPRPort00_05]> { } def : InstRW<[SPRWriteResGroup59], (instrs FDECSTP)>; -def SPRWriteResGroup60 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup60 : SchedWriteRes<[SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 2]; let Latency = 11; let NumMicroOps = 3; @@ -1069,13 +1069,13 @@ def : InstRW<[SPRWriteResGroup61], (instregex "^MMX_P(ADD|SUB)(B|D|Q|W)rr$", def : InstRW<[SPRWriteResGroup61], (instrs FINCSTP, FNOP)>; -def SPRWriteResGroup62 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11]> { +def SPRWriteResGroup62 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_10]> { let Latency = 7; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup62], (instrs FLDCW16m)>; -def SPRWriteResGroup63 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03, SPRPort02_03_11]> { +def SPRWriteResGroup63 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 5, 10, 39, 8]; let Latency = 62; let NumMicroOps = 64; @@ -1121,28 +1121,28 @@ def SPRWriteResGroup69 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06, SPRPort00 } def : InstRW<[SPRWriteResGroup69], (instrs FSTENVm)>; -def SPRWriteResGroup70 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort01_05, SPRPort02_03, SPRPort02_03_11, SPRPort06]> { +def SPRWriteResGroup70 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort01_05, SPRPort02_03, SPRPort02_03_10, SPRPort06]> { let ReleaseAtCycles = [4, 1, 2, 1, 47, 33, 2]; let Latency = 63; let NumMicroOps = 90; } def : InstRW<[SPRWriteResGroup70], (instrs FXRSTOR)>; -def SPRWriteResGroup71 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort01_05, SPRPort02_03, SPRPort02_03_11, SPRPort06]> { +def SPRWriteResGroup71 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort01_05, SPRPort02_03, SPRPort02_03_10, SPRPort06]> { let ReleaseAtCycles = [4, 1, 2, 1, 45, 31, 4]; let Latency = 63; let NumMicroOps = 88; } def : InstRW<[SPRWriteResGroup71], (instrs FXRSTOR64)>; -def SPRWriteResGroup72 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup72 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [2, 5, 10, 10, 2, 38, 5, 38]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 110; } def : InstRW<[SPRWriteResGroup72], (instregex "^FXSAVE((64)?)$")>; -def SPRWriteResGroup73 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup73 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let Latency = 12; let NumMicroOps = 2; } @@ -1212,41 +1212,41 @@ def : InstRW<[SPRWriteResGroup74], (instregex "^(V?)GF2P8MULBrr$", def : InstRW<[SPRWriteResGroup74], (instrs VCVTSH2SSZrr, VGF2P8MULBYrr)>; -def SPRWriteResGroup75 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup75 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort01_05_11, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [7, 5, 26, 19, 2, 7, 21]; let Latency = 35; let NumMicroOps = 87; } def : InstRW<[SPRWriteResGroup75], (instrs IN16ri)>; -def SPRWriteResGroup76 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup76 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort01_05_11, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [7, 1, 4, 26, 19, 3, 7, 20]; let Latency = 35; let NumMicroOps = 87; } def : InstRW<[SPRWriteResGroup76], (instrs IN16rr)>; -def SPRWriteResGroup77 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup77 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort01_05_11, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [7, 6, 28, 21, 2, 10, 20]; let Latency = 35; let NumMicroOps = 94; } def : InstRW<[SPRWriteResGroup77], (instrs IN32ri)>; -def SPRWriteResGroup78 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup78 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort01_05_11, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [7, 9, 28, 21, 2, 11, 21]; let NumMicroOps = 99; } def : InstRW<[SPRWriteResGroup78], (instrs IN32rr)>; -def SPRWriteResGroup79 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup79 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort01_05_11, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [7, 6, 25, 19, 2, 8, 20]; let Latency = 35; let NumMicroOps = 87; } def : InstRW<[SPRWriteResGroup79], (instrs IN8ri)>; -def SPRWriteResGroup80 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup80 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort01_05_11, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [7, 6, 25, 19, 2, 7, 20]; let Latency = 35; let NumMicroOps = 86; @@ -1258,7 +1258,7 @@ def SPRWriteResGroup81 : SchedWriteRes<[SPRPort00_06]> { } def : InstRW<[SPRWriteResGroup81], (instrs INC16r_alt)>; -def SPRWriteResGroup82 : SchedWriteRes<[SPRPort02_03_11]> { +def SPRWriteResGroup82 : SchedWriteRes<[SPRPort02_03_10]> { let Latency = 7; } def : InstRW<[SPRWriteResGroup82], (instregex "^LD_F(32|64|80)m$", @@ -1269,28 +1269,28 @@ def : InstRW<[SPRWriteResGroup82], (instregex "^LD_F(32|64|80)m$", def : InstRW<[SPRWriteResGroup82], (instrs INC32r_alt, VBROADCASTI32X2Z128rm)>; -def SPRWriteResGroup83 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup83 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [7, 6, 24, 17, 8, 1, 19, 1]; let Latency = 20; let NumMicroOps = 83; } def : InstRW<[SPRWriteResGroup83], (instrs INSB)>; -def SPRWriteResGroup84 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup84 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_01_05_06_11, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [7, 1, 5, 1, 27, 17, 11, 1, 21, 1]; let Latency = 20; let NumMicroOps = 92; } def : InstRW<[SPRWriteResGroup84], (instrs INSL)>; -def SPRWriteResGroup85 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup85 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_01_05_06_11, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05_11, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [7, 1, 4, 1, 25, 17, 1, 9, 1, 19, 1]; let Latency = 20; let NumMicroOps = 86; } def : InstRW<[SPRWriteResGroup85], (instrs INSW)>; -def SPRWriteResGroup86 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup86 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort01_05_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [5, 4, 8, 6, 2, 5, 7, 5]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 42; @@ -1350,7 +1350,7 @@ def : InstRW<[SPRWriteResGroup92], (instregex "^KAND(B|D|Q|W|ND|NQ|NW)kk$", def : InstRW<[SPRWriteResGroup92], (instrs KANDNBkk, VPSUBUSBZrr)>; -def SPRWriteResGroup93 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup93 : SchedWriteRes<[SPRPort02_03_10, SPRPort05]> { let Latency = 7; let NumMicroOps = 2; } @@ -1397,48 +1397,48 @@ def : InstRW<[SPRWriteResGroup96], (instregex "^K((OR)?)TEST(B|D|Q|W)kk$", "^VPSUBUS(B|W)Zrrk(z?)$")>; def : InstRW<[SPRWriteResGroup96], (instrs VMOVSDto64Zrr)>; -def SPRWriteResGroup97 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup97 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [8, 2, 14, 3, 1]; let Latency = 198; let NumMicroOps = 81; } def : InstRW<[SPRWriteResGroup97], (instrs LAR16rm)>; -def SPRWriteResGroup98 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup98 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 3, 1, 8, 5, 1, 2, 1]; let Latency = 66; let NumMicroOps = 22; } def : InstRW<[SPRWriteResGroup98], (instrs LAR16rr)>; -def SPRWriteResGroup99 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup99 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 2, 2, 9, 5, 3, 1]; let Latency = 71; let NumMicroOps = 85; } def : InstRW<[SPRWriteResGroup99], (instrs LAR32rm)>; -def SPRWriteResGroup100 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup100 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 3, 1, 8, 5, 1, 2, 1]; let Latency = 65; let NumMicroOps = 22; } def : InstRW<[SPRWriteResGroup100], (instregex "^LAR(32|64)rr$")>; -def SPRWriteResGroup101 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup101 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 2, 2, 9, 5, 3, 1]; let Latency = 71; let NumMicroOps = 87; } def : InstRW<[SPRWriteResGroup101], (instrs LAR64rm)>; -def SPRWriteResGroup102 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort01]> { +def SPRWriteResGroup102 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort01]> { let Latency = 2; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup102], (instrs LEA16r)>; -def SPRWriteResGroup103 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> { +def SPRWriteResGroup103 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort02_03_10]> { let ReleaseAtCycles = [3, 1]; let Latency = 6; let NumMicroOps = 4; @@ -1447,104 +1447,104 @@ def : InstRW<[SPRWriteResGroup103], (instregex "^LODS(B|W)$", "^SCAS(B|L|Q|W)$")>; def : InstRW<[SPRWriteResGroup103], (instrs LEAVE)>; -def SPRWriteResGroup104 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> { +def SPRWriteResGroup104 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 6; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup104], (instrs LEAVE64)>; -def SPRWriteResGroup105 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup105 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [1, 2, 4, 3, 2, 1, 1]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 14; } def : InstRW<[SPRWriteResGroup105], (instrs LGDT64m)>; -def SPRWriteResGroup106 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup106 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [1, 1, 5, 3, 2, 1, 1]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 14; } def : InstRW<[SPRWriteResGroup106], (instrs LIDT64m)>; -def SPRWriteResGroup107 : SchedWriteRes<[SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup107 : SchedWriteRes<[SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [5, 3, 2, 1, 1]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 12; } def : InstRW<[SPRWriteResGroup107], (instrs LLDT16m)>; -def SPRWriteResGroup108 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup108 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [1, 4, 3, 1, 1, 1]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 11; } def : InstRW<[SPRWriteResGroup108], (instrs LLDT16r)>; -def SPRWriteResGroup109 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup109 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [1, 1, 2, 8, 3, 1, 2, 7, 2]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 27; } def : InstRW<[SPRWriteResGroup109], (instrs LMSW16m)>; -def SPRWriteResGroup110 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup110 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [5, 7, 1, 2, 5, 2]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 22; } def : InstRW<[SPRWriteResGroup110], (instrs LMSW16r)>; -def SPRWriteResGroup111 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> { +def SPRWriteResGroup111 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 5; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup111], (instregex "^LODS(L|Q)$")>; -def SPRWriteResGroup112 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> { +def SPRWriteResGroup112 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01]> { let ReleaseAtCycles = [2, 4, 1]; let Latency = 3; let NumMicroOps = 7; } def : InstRW<[SPRWriteResGroup112], (instrs LOOP)>; -def SPRWriteResGroup113 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> { +def SPRWriteResGroup113 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01]> { let ReleaseAtCycles = [4, 6, 1]; let Latency = 3; let NumMicroOps = 11; } def : InstRW<[SPRWriteResGroup113], (instrs LOOPE)>; -def SPRWriteResGroup114 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> { +def SPRWriteResGroup114 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01]> { let ReleaseAtCycles = [4, 6, 1]; let Latency = 2; let NumMicroOps = 11; } def : InstRW<[SPRWriteResGroup114], (instrs LOOPNE)>; -def SPRWriteResGroup115 : SchedWriteRes<[SPRPort02_03, SPRPort02_03_11, SPRPort06]> { +def SPRWriteResGroup115 : SchedWriteRes<[SPRPort02_03, SPRPort02_03_10, SPRPort06]> { let Latency = 7; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup115], (instrs LRET64)>; -def SPRWriteResGroup116 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup116 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 5, 3, 3, 1]; let Latency = 70; let NumMicroOps = 13; } def : InstRW<[SPRWriteResGroup116], (instregex "^LSL(16|32|64)rm$")>; -def SPRWriteResGroup117 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup117 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 4, 4, 3, 2, 1]; let Latency = 63; let NumMicroOps = 15; } def : InstRW<[SPRWriteResGroup117], (instregex "^LSL(16|32|64)rr$")>; -def SPRWriteResGroup118 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup118 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let Latency = 24; let NumMicroOps = 3; } @@ -1574,7 +1574,7 @@ def SPRWriteResGroup121 : SchedWriteRes<[SPRPort00, SPRPort00_01]> { } def : InstRW<[SPRWriteResGroup121], (instrs MMX_CVTPI2PSrr)>; -def SPRWriteResGroup122 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup122 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 13; let NumMicroOps = 2; } @@ -1600,7 +1600,7 @@ def SPRWriteResGroup125 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> { def : InstRW<[SPRWriteResGroup125], (instregex "^VMOV(W|SHZ)mr$")>; def : InstRW<[SPRWriteResGroup125], (instrs MMX_MOVD64mr)>; -def SPRWriteResGroup126 : SchedWriteRes<[SPRPort02_03_11]> { +def SPRWriteResGroup126 : SchedWriteRes<[SPRPort02_03_10]> { let Latency = 8; } def : InstRW<[SPRWriteResGroup126], (instregex "^MMX_MOV(D|Q)64rm$", @@ -1631,7 +1631,7 @@ def SPRWriteResGroup128 : SchedWriteRes<[SPRPort00, SPRPort00_01_05]> { } def : InstRW<[SPRWriteResGroup128], (instregex "^MMX_MOVQ2(DQ|FR64)rr$")>; -def SPRWriteResGroup129 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup129 : SchedWriteRes<[SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 2]; let Latency = 12; let NumMicroOps = 3; @@ -1653,7 +1653,7 @@ def : InstRW<[SPRWriteResGroup130], (instregex "^MMX_PACKSS(DW|WB)rr$", def : InstRW<[SPRWriteResGroup130], (instrs MMX_PACKUSWBrr)>; def : InstRW<[SPRWriteResGroup130, ReadDefault, ReadInt2Fpu], (instrs MMX_PINSRWrri)>; -def SPRWriteResGroup131 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11]> { +def SPRWriteResGroup131 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_10]> { let Latency = 9; let NumMicroOps = 2; } @@ -1675,7 +1675,7 @@ def : InstRW<[SPRWriteResGroup131, ReadAfterVecYLd], (instregex "^VINSERT(F|I)(3 "^VPTERNLOG(D|Q)Zrmbik(z?)$", "^VPTERNLOG(D|Q)Zrmi((kz)?)$")>; -def SPRWriteResGroup132 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup132 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 11; let NumMicroOps = 4; @@ -1689,7 +1689,7 @@ def SPRWriteResGroup133 : SchedWriteRes<[SPRPort00, SPRPort05]> { } def : InstRW<[SPRWriteResGroup133], (instregex "^MMX_PH(ADD|SUB)SWrr$")>; -def SPRWriteResGroup134 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup134 : SchedWriteRes<[SPRPort02_03_10, SPRPort05]> { let Latency = 9; let NumMicroOps = 2; } @@ -1702,7 +1702,7 @@ def : InstRW<[SPRWriteResGroup134, ReadAfterVecLd], (instregex "^VFPCLASSS(D|H|S def : InstRW<[SPRWriteResGroup134, ReadAfterVecYLd], (instregex "^VPALIGNR(Y|Z256)rmi$")>; def : InstRW<[SPRWriteResGroup134, ReadAfterVecYLd], (instrs VPSHUFBZrm)>; -def SPRWriteResGroup135 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> { +def SPRWriteResGroup135 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort02_03_10]> { let Latency = 5; let NumMicroOps = 2; } @@ -1716,42 +1716,42 @@ def : InstRW<[SPRWriteResGroup136], (instregex "^PUSH(F|G)S(16|32)$")>; def : InstRW<[SPRWriteResGroup136], (instrs MOV16ms, MOVBE32mr)>; -def SPRWriteResGroup137 : SchedWriteRes<[SPRPort00_01_05_06_10]>; +def SPRWriteResGroup137 : SchedWriteRes<[SPRPort00_01_05_06_11]>; def : InstRW<[SPRWriteResGroup137], (instregex "^MOV(8|16|32|64)ri$", "^MOV(8|16|32)ri_alt$", "^MOV(8|16)rr((_REV)?)$")>; def : InstRW<[SPRWriteResGroup137], (instrs MOV64ri32, MOV8rr_NOREX)>; -def SPRWriteResGroup138 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort01]> { +def SPRWriteResGroup138 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort01]> { let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup138], (instregex "^MOV(16|32|64)rs$", "^S(TR|LDT)16r$")>; -def SPRWriteResGroup139 : SchedWriteRes<[SPRPort02_03_11]>; +def SPRWriteResGroup139 : SchedWriteRes<[SPRPort02_03_10]>; def : InstRW<[SPRWriteResGroup139], (instregex "^MOV32ao(16|32|64)$")>; def : InstRW<[SPRWriteResGroup139], (instrs MOV64ao64)>; -def SPRWriteResGroup140 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup140 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort04_09, SPRPort07_08]> { let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup140], (instregex "^MOV(8|32)o(16|32)a$", "^MOV(8|32|64)o64a$")>; -def SPRWriteResGroup141 : SchedWriteRes<[SPRPort00_01_05_06_10]> { +def SPRWriteResGroup141 : SchedWriteRes<[SPRPort00_01_05_06_11]> { let Latency = 0; } def : InstRW<[SPRWriteResGroup141], (instregex "^MOV32rr((_REV)?)$", "^MOVZX(32|64)rr8$")>; def : InstRW<[SPRWriteResGroup141], (instrs MOVZX32rr8_NOREX)>; -def SPRWriteResGroup142 : SchedWriteRes<[SPRPort02_03_11]> { +def SPRWriteResGroup142 : SchedWriteRes<[SPRPort02_03_10]> { let Latency = 5; } def : InstRW<[SPRWriteResGroup142], (instrs MOV64ao32)>; -def SPRWriteResGroup143 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup143 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort01_05_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [1, 2, 4, 16, 7, 2, 2, 12, 2]; let Latency = 217; let NumMicroOps = 48; @@ -1764,20 +1764,20 @@ def SPRWriteResGroup144 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> { } def : InstRW<[SPRWriteResGroup144], (instrs MOV64o32a)>; -def SPRWriteResGroup145 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort05]> { +def SPRWriteResGroup145 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort05]> { let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup145], (instrs MOV64rc)>; -def SPRWriteResGroup146 : SchedWriteRes<[SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort05]> { +def SPRWriteResGroup146 : SchedWriteRes<[SPRPort00_01_05, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort01_05_11, SPRPort05]> { let ReleaseAtCycles = [3, 4, 8, 4, 2, 3]; let Latency = 181; let NumMicroOps = 24; } def : InstRW<[SPRWriteResGroup146], (instrs MOV64rd)>; -def SPRWriteResGroup147 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> { +def SPRWriteResGroup147 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort02_03_10]> { let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup147], (instregex "^MOV8ao(16|32|64)$")>; @@ -1788,13 +1788,13 @@ def SPRWriteResGroup148 : SchedWriteRes<[SPRPort00_06, SPRPort04_09, SPRPort07_0 } def : InstRW<[SPRWriteResGroup148], (instrs MOVBE16mr)>; -def SPRWriteResGroup149 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort02_03_11]> { +def SPRWriteResGroup149 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort02_03_10]> { let Latency = 7; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup149], (instrs MOVBE16rm)>; -def SPRWriteResGroup150 : SchedWriteRes<[SPRPort01, SPRPort02_03_11]> { +def SPRWriteResGroup150 : SchedWriteRes<[SPRPort01, SPRPort02_03_10]> { let Latency = 6; let NumMicroOps = 2; } @@ -1809,13 +1809,13 @@ def : InstRW<[SPRWriteResGroup151], (instrs MOVBE64mr, SLDT16m, STRm)>; -def SPRWriteResGroup152 : SchedWriteRes<[SPRPort00_06, SPRPort01, SPRPort02_03_11]> { +def SPRWriteResGroup152 : SchedWriteRes<[SPRPort00_06, SPRPort01, SPRPort02_03_10]> { let Latency = 7; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup152], (instrs MOVBE64rm)>; -def SPRWriteResGroup153 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup153 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup153], (instregex "^MOVDIR64B(16|32|64)$")>; @@ -1832,7 +1832,7 @@ def SPRWriteResGroup155 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> { } def : InstRW<[SPRWriteResGroup155], (instrs MOVDIRI64)>; -def SPRWriteResGroup156 : SchedWriteRes<[SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup156 : SchedWriteRes<[SPRPort01_05, SPRPort02_03_10]> { let Latency = 8; let NumMicroOps = 2; } @@ -1855,7 +1855,7 @@ def SPRWriteResGroup158 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> { } def : InstRW<[SPRWriteResGroup158], (instrs MOVNTImr)>; -def SPRWriteResGroup159 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup159 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [4, 1, 1, 1]; let Latency = 8; let NumMicroOps = 7; @@ -1876,31 +1876,31 @@ def : InstRW<[SPRWriteResGroup160], (instregex "^(V?)MOVS(D|S)rr((_REV)?)$", "^VPTERNLOG(D|Q)Z(128|256)rri((k|kz)?)$")>; def : InstRW<[SPRWriteResGroup160], (instrs VPBLENDDrri)>; -def SPRWriteResGroup161 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup161 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [4, 1, 1, 1]; let Latency = 7; let NumMicroOps = 7; } def : InstRW<[SPRWriteResGroup161], (instregex "^MOVS(L|Q|W)$")>; -def SPRWriteResGroup162 : SchedWriteRes<[SPRPort02_03_11]> { +def SPRWriteResGroup162 : SchedWriteRes<[SPRPort02_03_10]> { let Latency = 6; } def : InstRW<[SPRWriteResGroup162], (instregex "^MOVSX(16|32|64)rm(16|32)$", "^MOVSX(32|64)rm8$")>; def : InstRW<[SPRWriteResGroup162], (instrs MOVSX32rm8_NOREX)>; -def SPRWriteResGroup163 : SchedWriteRes<[SPRPort01_05_10, SPRPort02_03_11]> { +def SPRWriteResGroup163 : SchedWriteRes<[SPRPort01_05_11, SPRPort02_03_10]> { let Latency = 6; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup163], (instrs MOVSX16rm8)>; -def SPRWriteResGroup164 : SchedWriteRes<[SPRPort01_05_10]>; +def SPRWriteResGroup164 : SchedWriteRes<[SPRPort01_05_11]>; def : InstRW<[SPRWriteResGroup164], (instregex "^MOVSX(16|32|64)rr(8|16|32)$")>; def : InstRW<[SPRWriteResGroup164], (instrs MOVSX32rr8_NOREX)>; -def SPRWriteResGroup165 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup165 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 11; let NumMicroOps = 2; } @@ -1916,7 +1916,7 @@ def : InstRW<[SPRWriteResGroup165, ReadAfterVecYLd], (instregex "^VP(ADD|SUB)(U? "^VPS(L|R)L(V?)WZrmk(z?)$", "^VPSRA(V?)WZrmk(z?)$")>; -def SPRWriteResGroup166 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup166 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let Latency = 14; let NumMicroOps = 3; } @@ -1945,70 +1945,70 @@ def SPRWriteResGroup168 : SchedWriteRes<[SPRPort00_01_05_06, SPRPort05, SPRPort0 } def : InstRW<[SPRWriteResGroup168], (instrs MWAITrr)>; -def SPRWriteResGroup169 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup169 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [6, 4, 1, 28, 15, 7, 1, 16, 1]; let Latency = 35; let NumMicroOps = 79; } def : InstRW<[SPRWriteResGroup169], (instrs OUT16ir)>; -def SPRWriteResGroup170 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup170 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [6, 6, 27, 15, 7, 1, 16, 1]; let Latency = 35; let NumMicroOps = 79; } def : InstRW<[SPRWriteResGroup170], (instrs OUT16rr)>; -def SPRWriteResGroup171 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup171 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [6, 4, 1, 30, 15, 9, 1, 18, 1]; let Latency = 35; let NumMicroOps = 85; } def : InstRW<[SPRWriteResGroup171], (instrs OUT32ir)>; -def SPRWriteResGroup172 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup172 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [6, 6, 29, 15, 9, 1, 18, 1]; let Latency = 35; let NumMicroOps = 85; } def : InstRW<[SPRWriteResGroup172], (instrs OUT32rr)>; -def SPRWriteResGroup173 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup173 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [5, 5, 1, 25, 15, 5, 1, 15, 1]; let Latency = 35; let NumMicroOps = 73; } def : InstRW<[SPRWriteResGroup173], (instrs OUT8ir)>; -def SPRWriteResGroup174 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup174 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [5, 5, 26, 15, 5, 1, 15, 1]; let Latency = 35; let NumMicroOps = 73; } def : InstRW<[SPRWriteResGroup174], (instrs OUT8rr)>; -def SPRWriteResGroup175 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup175 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [7, 6, 25, 16, 7, 1, 17, 1]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 80; } def : InstRW<[SPRWriteResGroup175], (instrs OUTSB)>; -def SPRWriteResGroup176 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup176 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [7, 6, 28, 16, 10, 1, 20, 1]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 89; } def : InstRW<[SPRWriteResGroup176], (instrs OUTSL)>; -def SPRWriteResGroup177 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup177 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [6, 1, 5, 27, 16, 8, 1, 18, 1]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 83; } def : InstRW<[SPRWriteResGroup177], (instrs OUTSW)>; -def SPRWriteResGroup178 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup178 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10]> { let Latency = 8; let NumMicroOps = 2; } @@ -2028,7 +2028,7 @@ def : InstRW<[SPRWriteResGroup178, ReadAfterVecXLd], (instregex "^(V?)P(ADD|SUB) "^VPTERNLOG(D|Q)Z128rmi((kz)?)$")>; def : InstRW<[SPRWriteResGroup178, ReadAfterVecXLd], (instrs VPBLENDDrmi)>; -def SPRWriteResGroup179 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup179 : SchedWriteRes<[SPRPort02_03_10, SPRPort05]> { let Latency = 8; let NumMicroOps = 2; } @@ -2045,7 +2045,7 @@ def SPRWriteResGroup180 : SchedWriteRes<[SPRPort00_06, SPRPort05]> { } def : InstRW<[SPRWriteResGroup180], (instrs PAUSE)>; -def SPRWriteResGroup181 : SchedWriteRes<[SPRPort01, SPRPort02_03_11]> { +def SPRWriteResGroup181 : SchedWriteRes<[SPRPort01, SPRPort02_03_10]> { let Latency = 8; let NumMicroOps = 2; } @@ -2059,7 +2059,7 @@ def : InstRW<[SPRWriteResGroup182], (instregex "^(V?)PEXTR(D|Q)mri$", "^VPEXTR(D|Q)Zmri$", "^VPMOVQDZ128mr(k?)$")>; -def SPRWriteResGroup183 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup183 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [1, 2, 1]; let Latency = 9; let NumMicroOps = 4; @@ -2074,48 +2074,48 @@ def SPRWriteResGroup184 : SchedWriteRes<[SPRPort00_01, SPRPort01_05]> { def : InstRW<[SPRWriteResGroup184], (instregex "^(V?)PH(ADD|SUB)SWrr$", "^VPH(ADD|SUB)SWYrr$")>; -def SPRWriteResGroup185 : SchedWriteRes<[SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup185 : SchedWriteRes<[SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let Latency = 12; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup185], (instregex "^POP(16|32|64)rmm$", "^PUSH(16|32)rmm$")>; -def SPRWriteResGroup186 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11]> { +def SPRWriteResGroup186 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10]> { let ReleaseAtCycles = [6, 2, 1, 1]; let Latency = 5; let NumMicroOps = 10; } def : InstRW<[SPRWriteResGroup186], (instrs POPF16)>; -def SPRWriteResGroup187 : SchedWriteRes<[SPRPort00_06, SPRPort01, SPRPort02_03_11]> { +def SPRWriteResGroup187 : SchedWriteRes<[SPRPort00_06, SPRPort01, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 5; let NumMicroOps = 7; } def : InstRW<[SPRWriteResGroup187], (instrs POPF64)>; -def SPRWriteResGroup188 : SchedWriteRes<[SPRPort02_03_11]> { +def SPRWriteResGroup188 : SchedWriteRes<[SPRPort02_03_10]> { let Latency = 0; } def : InstRW<[SPRWriteResGroup188], (instregex "^PREFETCHT(0|1|2)$")>; def : InstRW<[SPRWriteResGroup188], (instrs PREFETCHNTA)>; -def SPRWriteResGroup189 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11, SPRPort06]> { +def SPRWriteResGroup189 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort02_03_10, SPRPort06]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup189], (instregex "^PTWRITE((64)?)m$")>; -def SPRWriteResGroup190 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort06]> { +def SPRWriteResGroup190 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort06]> { let ReleaseAtCycles = [1, 2]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup190], (instrs PTWRITE64r)>; -def SPRWriteResGroup191 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort06]> { +def SPRWriteResGroup191 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort06]> { let ReleaseAtCycles = [2, 2]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 4; @@ -2127,7 +2127,7 @@ def SPRWriteResGroup192 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> { } def : InstRW<[SPRWriteResGroup192], (instregex "^PUSH64r((mr)?)$")>; -def SPRWriteResGroup193 : SchedWriteRes<[SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup193 : SchedWriteRes<[SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup193], (instrs PUSH64rmm)>; @@ -2143,49 +2143,49 @@ def SPRWriteResGroup195 : SchedWriteRes<[SPRPort01, SPRPort04_09, SPRPort07_08]> } def : InstRW<[SPRWriteResGroup195], (instregex "^PUSH(F|G)S64$")>; -def SPRWriteResGroup196 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> { +def SPRWriteResGroup196 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01]> { let ReleaseAtCycles = [2, 3, 2]; let Latency = 8; let NumMicroOps = 7; } def : InstRW<[SPRWriteResGroup196], (instregex "^RC(L|R)(16|32|64)rCL$")>; -def SPRWriteResGroup197 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> { +def SPRWriteResGroup197 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06]> { let ReleaseAtCycles = [1, 2]; let Latency = 13; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup197, WriteRMW], (instregex "^RC(L|R)8m(1|i)$")>; -def SPRWriteResGroup198 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> { +def SPRWriteResGroup198 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01]> { let ReleaseAtCycles = [1, 5, 2]; let Latency = 20; let NumMicroOps = 8; } def : InstRW<[SPRWriteResGroup198, WriteRMW], (instrs RCL8mCL)>; -def SPRWriteResGroup199 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> { +def SPRWriteResGroup199 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01]> { let ReleaseAtCycles = [2, 5, 2]; let Latency = 7; let NumMicroOps = 9; } def : InstRW<[SPRWriteResGroup199], (instrs RCL8rCL)>; -def SPRWriteResGroup200 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> { +def SPRWriteResGroup200 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01]> { let ReleaseAtCycles = [2, 4, 3]; let Latency = 20; let NumMicroOps = 9; } def : InstRW<[SPRWriteResGroup200, WriteRMW], (instrs RCR8mCL)>; -def SPRWriteResGroup201 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> { +def SPRWriteResGroup201 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01]> { let ReleaseAtCycles = [3, 4, 3]; let Latency = 9; let NumMicroOps = 10; } def : InstRW<[SPRWriteResGroup201], (instrs RCR8rCL)>; -def SPRWriteResGroup202 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_05, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort01_05_10, SPRPort05]> { +def SPRWriteResGroup202 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_05, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort01_05_11, SPRPort05]> { let ReleaseAtCycles = [1, 6, 1, 10, 20, 8, 5, 1, 2]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 54; @@ -2197,48 +2197,48 @@ def SPRWriteResGroup203 : SchedWriteRes<[SPRPort01]> { } def : InstRW<[SPRWriteResGroup203], (instrs RDPID64)>; -def SPRWriteResGroup204 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> { +def SPRWriteResGroup204 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01]> { let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup204], (instrs RDPKRUr)>; -def SPRWriteResGroup205 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort05]> { +def SPRWriteResGroup205 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort05]> { let ReleaseAtCycles = [9, 6, 2, 1]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 18; } def : InstRW<[SPRWriteResGroup205], (instrs RDPMC)>; -def SPRWriteResGroup206 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup206 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 3, 2, 5, 7, 3, 1, 2]; let Latency = 1386; let NumMicroOps = 25; } def : InstRW<[SPRWriteResGroup206], (instrs RDRAND16r)>; -def SPRWriteResGroup207 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup207 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 3, 2, 5, 7, 3, 1, 2]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 25; } def : InstRW<[SPRWriteResGroup207], (instregex "^RDRAND(32|64)r$")>; -def SPRWriteResGroup208 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup208 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 3, 3, 5, 7, 1, 4]; let Latency = 1381; let NumMicroOps = 25; } def : InstRW<[SPRWriteResGroup208], (instrs RDSEED16r)>; -def SPRWriteResGroup209 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup209 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 3, 3, 5, 7, 1, 4]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 25; } def : InstRW<[SPRWriteResGroup209], (instregex "^RDSEED(32|64)r$")>; -def SPRWriteResGroup210 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort05]> { +def SPRWriteResGroup210 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort05]> { let ReleaseAtCycles = [5, 6, 3, 1]; let Latency = 18; let NumMicroOps = 15; @@ -2252,13 +2252,13 @@ def SPRWriteResGroup211 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_0 } def : InstRW<[SPRWriteResGroup211], (instrs RDTSCP)>; -def SPRWriteResGroup212 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_11]> { +def SPRWriteResGroup212 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_10]> { let Latency = 7; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup212], (instrs RET64)>; -def SPRWriteResGroup213 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_11]> { +def SPRWriteResGroup213 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 6; let NumMicroOps = 3; @@ -2290,7 +2290,7 @@ def : InstRW<[SPRWriteResGroup217, WriteRMW], (instregex "^RO(L|R)8m(1|i)$", "^(RO|SH)L8mCL$", "^(RO|SA|SH)R8mCL$")>; -def SPRWriteResGroup218 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup218 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 15; let NumMicroOps = 3; @@ -2329,7 +2329,7 @@ def SPRWriteResGroup221 : SchedWriteRes<[SPRPort00_06]> { def : InstRW<[SPRWriteResGroup221, WriteRMW], (instregex "^S(A|H)R8m(1|i)$", "^SHL8m(1|i)$")>; -def SPRWriteResGroup222 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_11]> { +def SPRWriteResGroup222 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_10]> { let Latency = 8; let NumMicroOps = 2; } @@ -2342,7 +2342,7 @@ def SPRWriteResGroup223 : SchedWriteRes<[SPRPort00_06]> { def : InstRW<[SPRWriteResGroup223], (instregex "^S(A|H)RX(32|64)rr$", "^SHLX(32|64)rr$")>; -def SPRWriteResGroup224 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup224 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [2, 2, 1, 1, 1]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 7; @@ -2355,14 +2355,14 @@ def SPRWriteResGroup225 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> { } def : InstRW<[SPRWriteResGroup225], (instrs SFENCE)>; -def SPRWriteResGroup226 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort01, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup226 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort01, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [1, 2, 2, 2]; let Latency = 21; let NumMicroOps = 7; } def : InstRW<[SPRWriteResGroup226], (instregex "^S(G|I)DT64m$")>; -def SPRWriteResGroup227 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup227 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10, SPRPort05]> { let Latency = 9; let NumMicroOps = 3; } @@ -2374,7 +2374,7 @@ def SPRWriteResGroup228 : SchedWriteRes<[SPRPort00_01_05, SPRPort05]> { } def : InstRW<[SPRWriteResGroup228], (instrs SHA1MSG1rr)>; -def SPRWriteResGroup229 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup229 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 2, 1, 2, 1]; let Latency = 13; let NumMicroOps = 8; @@ -2388,7 +2388,7 @@ def SPRWriteResGroup230 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0 } def : InstRW<[SPRWriteResGroup230], (instrs SHA1MSG2rr)>; -def SPRWriteResGroup231 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup231 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_10]> { let Latency = 8; let NumMicroOps = 4; } @@ -2400,7 +2400,7 @@ def SPRWriteResGroup232 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0 } def : InstRW<[SPRWriteResGroup232], (instrs SHA1NEXTErr)>; -def SPRWriteResGroup233 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup233 : SchedWriteRes<[SPRPort02_03_10, SPRPort05]> { let Latency = 13; let NumMicroOps = 2; } @@ -2432,7 +2432,7 @@ def SPRWriteResGroup234 : SchedWriteRes<[SPRPort05]> { def : InstRW<[SPRWriteResGroup234], (instrs SHA1RNDS4rri, SHA256RNDS2rr)>; -def SPRWriteResGroup235 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup235 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [3, 2, 1, 1, 1]; let Latency = 12; let NumMicroOps = 8; @@ -2457,64 +2457,64 @@ def : InstRW<[SPRWriteResGroup237], (instregex "^VPMOV(D|Q|W|SQ|SW)BZrrk(z?)$", "^VPMOVUS(Q|W)BZrrk(z?)$")>; def : InstRW<[SPRWriteResGroup237], (instrs SHA256MSG2rr)>; -def SPRWriteResGroup238 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup238 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let Latency = 13; let NumMicroOps = 5; } def : InstRW<[SPRWriteResGroup238], (instrs SHRD16mri8)>; -def SPRWriteResGroup239 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort01]> { +def SPRWriteResGroup239 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort01]> { let Latency = 6; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup239], (instregex "^SLDT(32|64)r$")>; -def SPRWriteResGroup240 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort05]> { +def SPRWriteResGroup240 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort05]> { let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup240], (instrs SMSW16r)>; -def SPRWriteResGroup241 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort05]> { +def SPRWriteResGroup241 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort05]> { let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup241], (instregex "^SMSW(32|64)r$")>; -def SPRWriteResGroup242 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup242 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 24; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup242, ReadAfterVecLd], (instregex "^(V?)SQRTSDm_Int$")>; def : InstRW<[SPRWriteResGroup242, ReadAfterVecLd], (instrs VSQRTSDZm_Int)>; -def SPRWriteResGroup243 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> { +def SPRWriteResGroup243 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06]> { let Latency = 6; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup243], (instrs STD)>; -def SPRWriteResGroup244 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> { +def SPRWriteResGroup244 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01]> { let ReleaseAtCycles = [1, 4, 1]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 6; } def : InstRW<[SPRWriteResGroup244], (instrs STI)>; -def SPRWriteResGroup245 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup245 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 8; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup245], (instrs STOSB)>; -def SPRWriteResGroup246 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup246 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 7; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup246], (instregex "^STOS(L|Q|W)$")>; -def SPRWriteResGroup247 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort01]> { +def SPRWriteResGroup247 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort01]> { let Latency = 5; let NumMicroOps = 2; } @@ -2602,7 +2602,7 @@ def : InstRW<[SPRWriteResGroup253], (instregex "^V(ADD|SUB)PHZrr(bk|kz)$", "^VM(AX|IN|UL)PHZrr(bk|kz)$", "^VM(AX|IN|UL)PHZrr(k|bkz)$")>; -def SPRWriteResGroup254 : SchedWriteRes<[SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup254 : SchedWriteRes<[SPRPort01_05, SPRPort02_03_10]> { let Latency = 11; let NumMicroOps = 2; } @@ -2617,7 +2617,7 @@ def : InstRW<[SPRWriteResGroup254, ReadAfterVecYLd], (instrs VADDSUBPSYrm)>; def : InstRW<[SPRWriteResGroup254, ReadAfterVecXLd], (instregex "^VPSHUFBZ128rmk(z?)$", "^VPUNPCK(H|L)(BW|WD)Z128rmk(z?)$")>; -def SPRWriteResGroup255 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11]> { +def SPRWriteResGroup255 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_10]> { let Latency = 11; let NumMicroOps = 2; } @@ -2633,7 +2633,7 @@ def SPRWriteResGroup256 : SchedWriteRes<[SPRPort00_05]> { def : InstRW<[SPRWriteResGroup256], (instregex "^V(ADD|SUB)PSZrr(bk|kz)$", "^V(ADD|SUB)PSZrr(k|bkz)$")>; -def SPRWriteResGroup257 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup257 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 12; let NumMicroOps = 2; } @@ -2647,7 +2647,7 @@ def : InstRW<[SPRWriteResGroup257, ReadAfterVecYLd], (instrs VGF2P8MULBZrm)>; def : InstRW<[SPRWriteResGroup257, ReadAfterVecYLd, ReadAfterVecYLd], (instregex "^VPMADD52(H|L)UQZm((b|k|bk|kz)?)$", "^VPMADD52(H|L)UQZmbkz$")>; -def SPRWriteResGroup258 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup258 : SchedWriteRes<[SPRPort02_03_10, SPRPort05]> { let Latency = 11; let NumMicroOps = 2; } @@ -2673,7 +2673,7 @@ def : InstRW<[SPRWriteResGroup258, ReadAfterVecXLd], (instregex "^VPALIGNRZ128rm "^VPCLMULQDQ(Y|Z)rmi$")>; def : InstRW<[SPRWriteResGroup258, ReadAfterVecXLd], (instrs VPCLMULQDQZ256rmi)>; -def SPRWriteResGroup259 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup259 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [3, 1]; let Latency = 10; let NumMicroOps = 4; @@ -2690,7 +2690,7 @@ def : InstRW<[SPRWriteResGroup260], (instregex "^VBLENDVP(S|DY)rrr$", "^VBLENDVP(D|SY)rrr$", "^VPBLENDVB(Y?)rrr$")>; -def SPRWriteResGroup261 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup261 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [3, 1]; let Latency = 9; let NumMicroOps = 4; @@ -2698,7 +2698,7 @@ def SPRWriteResGroup261 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> { def : InstRW<[SPRWriteResGroup261, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^VBLENDVP(D|S)rmr$")>; def : InstRW<[SPRWriteResGroup261, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instrs VPBLENDVBrmr)>; -def SPRWriteResGroup262 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup262 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10]> { let Latency = 9; let NumMicroOps = 2; } @@ -2718,7 +2718,7 @@ def : InstRW<[SPRWriteResGroup262, ReadAfterVecYLd], (instregex "^VINSERT(F|I)12 "^VPTERNLOG(D|Q)Z256rmbik(z?)$", "^VPTERNLOG(D|Q)Z256rmi((kz)?)$")>; -def SPRWriteResGroup263 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup263 : SchedWriteRes<[SPRPort02_03_10, SPRPort05]> { let Latency = 3; let NumMicroOps = 2; } @@ -2748,7 +2748,7 @@ def : InstRW<[SPRWriteResGroup263, ReadAfterVecLd], (instregex "^VCMPS(D|H|S)Zrm "^VCMPS(D|H|S)Zrmi_Int(k?)$", "^VFPCLASSS(D|H|S)Zmik$")>; -def SPRWriteResGroup264 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup264 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 10; let NumMicroOps = 2; } @@ -2806,14 +2806,14 @@ def : InstRW<[SPRWriteResGroup268], (instregex "^VCVT(U?)DQ2PDZrr((k|kz)?)$", "^VCVT(U?)QQ2PSZrr((b|k|bk|kz)?)$", "^VCVT(U?)QQ2PSZrrbkz$")>; -def SPRWriteResGroup269 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup269 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let Latency = 15; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup269], (instregex "^VCVT(U?)DQ2PHZ128rm(b?)$", "^VCVTNEPS2BF16Z128rm(b?)$")>; -def SPRWriteResGroup270 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup270 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let Latency = 19; let NumMicroOps = 4; } @@ -2832,7 +2832,7 @@ def SPRWriteResGroup272 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort05]> } def : InstRW<[SPRWriteResGroup272], (instregex "^VCVT(U?)DQ2PHZ128rrk(z?)$")>; -def SPRWriteResGroup273 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup273 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let Latency = 17; let NumMicroOps = 4; } @@ -2840,7 +2840,7 @@ def : InstRW<[SPRWriteResGroup273], (instregex "^VCVT(U?)DQ2PHZ256rm(b?)$", "^VCVTNEPS2BF16Z128rm(bk|kz)$", "^VCVTNEPS2BF16Z128rm(k|bkz)$")>; -def SPRWriteResGroup274 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup274 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let Latency = 21; let NumMicroOps = 4; } @@ -2859,14 +2859,14 @@ def SPRWriteResGroup276 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort05]> } def : InstRW<[SPRWriteResGroup276], (instregex "^VCVT(U?)DQ2PHZ256rrk(z?)$")>; -def SPRWriteResGroup277 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup277 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 17; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup277], (instregex "^VCVT(U?)DQ2PHZrm(b?)$")>; -def SPRWriteResGroup278 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup278 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 21; let NumMicroOps = 4; @@ -2889,14 +2889,14 @@ def SPRWriteResGroup280 : SchedWriteRes<[SPRPort00, SPRPort05]> { def : InstRW<[SPRWriteResGroup280], (instregex "^VCVT(U?)DQ2PHZrr(bk|kz)$", "^VCVT(U?)DQ2PHZrr(k|bkz)$")>; -def SPRWriteResGroup281 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup281 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1, 1]; let Latency = 15; let NumMicroOps = 5; } def : InstRW<[SPRWriteResGroup281, ReadAfterVecXLd], (instregex "^VCVTNE2PS2BF16Z128rm(b?)$")>; -def SPRWriteResGroup282 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup282 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1, 1]; let Latency = 17; let NumMicroOps = 5; @@ -2918,14 +2918,14 @@ def SPRWriteResGroup284 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort05]> } def : InstRW<[SPRWriteResGroup284], (instregex "^VCVTNE2PS2BF16Z(128|256)rrk(z?)$")>; -def SPRWriteResGroup285 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup285 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1, 1]; let Latency = 16; let NumMicroOps = 5; } def : InstRW<[SPRWriteResGroup285, ReadAfterVecYLd], (instregex "^VCVTNE2PS2BF16Z256rm(b?)$")>; -def SPRWriteResGroup286 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup286 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1, 1]; let Latency = 18; let NumMicroOps = 5; @@ -2933,7 +2933,7 @@ def SPRWriteResGroup286 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_0 def : InstRW<[SPRWriteResGroup286, ReadAfterVecYLd], (instregex "^VCVTNE2PS2BF16Z256rm(bk|kz)$", "^VCVTNE2PS2BF16Z256rm(k|bkz)$")>; -def SPRWriteResGroup287 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup287 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 2]; let Latency = 16; let NumMicroOps = 5; @@ -2942,7 +2942,7 @@ def : InstRW<[SPRWriteResGroup287, ReadAfterVecYLd], (instregex "^VCVTNE2PS2BF16 "^VDPBF16PSZm((b|k|bk|kz)?)$")>; def : InstRW<[SPRWriteResGroup287, ReadAfterVecYLd], (instrs VDPBF16PSZmbkz)>; -def SPRWriteResGroup288 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup288 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 2]; let Latency = 18; let NumMicroOps = 5; @@ -2977,27 +2977,27 @@ def SPRWriteResGroup292 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort05]> } def : InstRW<[SPRWriteResGroup292], (instregex "^VCVTNEPS2BF16Z(128|256)rrk(z?)$")>; -def SPRWriteResGroup293 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup293 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let Latency = 16; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup293], (instregex "^VCVTNEPS2BF16Z256rm(b?)$")>; -def SPRWriteResGroup294 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup294 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let Latency = 18; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup294], (instregex "^VCVTNEPS2BF16Z256rm(bk|kz)$", "^VCVTNEPS2BF16Z256rm(k|bkz)$")>; -def SPRWriteResGroup295 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup295 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 16; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup295], (instregex "^VCVTNEPS2BF16Zrm(b?)$")>; -def SPRWriteResGroup296 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup296 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 18; let NumMicroOps = 4; @@ -3019,7 +3019,7 @@ def SPRWriteResGroup298 : SchedWriteRes<[SPRPort00, SPRPort05]> { } def : InstRW<[SPRWriteResGroup298], (instregex "^VCVTNEPS2BF16Zrrk(z?)$")>; -def SPRWriteResGroup299 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup299 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let Latency = 15; let NumMicroOps = 3; } @@ -3033,7 +3033,7 @@ def : InstRW<[SPRWriteResGroup299], (instregex "^VCVT(T?)PD2DQYrm$", "^VCVT(U?)QQ2PSZ256rm((b|k|bk|kz)?)$", "^VCVT(U?)QQ2PSZ256rmbkz$")>; -def SPRWriteResGroup300 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup300 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let Latency = 15; let NumMicroOps = 3; } @@ -3044,14 +3044,14 @@ def : InstRW<[SPRWriteResGroup300], (instregex "^VCVT(T?)P(D|H)2(U?)DQZrm(b?)$", "^VCVT(U?)QQ2PSZrm((b|k|bk|kz)?)$", "^VCVT(U?)QQ2PSZrmbkz$")>; -def SPRWriteResGroup301 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup301 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1, 1, 2]; let Latency = 19; let NumMicroOps = 7; } def : InstRW<[SPRWriteResGroup301], (instregex "^VCVTPD2PHZ128rm(b?)$")>; -def SPRWriteResGroup302 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup302 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1, 1, 2]; let Latency = 22; let NumMicroOps = 7; @@ -3073,14 +3073,14 @@ def SPRWriteResGroup304 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0 } def : InstRW<[SPRWriteResGroup304], (instregex "^VCVTPD2PHZ128rrk(z?)$")>; -def SPRWriteResGroup305 : SchedWriteRes<[SPRPort00_01, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup305 : SchedWriteRes<[SPRPort00_01, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1, 2]; let Latency = 21; let NumMicroOps = 6; } def : InstRW<[SPRWriteResGroup305], (instregex "^VCVTPD2PHZ256rm(b?)$")>; -def SPRWriteResGroup306 : SchedWriteRes<[SPRPort00_01, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup306 : SchedWriteRes<[SPRPort00_01, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1, 2]; let Latency = 24; let NumMicroOps = 6; @@ -3102,14 +3102,14 @@ def SPRWriteResGroup308 : SchedWriteRes<[SPRPort00_01, SPRPort05]> { } def : InstRW<[SPRWriteResGroup308], (instregex "^VCVTPD2PHZ256rrk(z?)$")>; -def SPRWriteResGroup309 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup309 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1, 2]; let Latency = 23; let NumMicroOps = 6; } def : InstRW<[SPRWriteResGroup309], (instregex "^VCVTP(D2PH|H2PD)Zrm(b?)$")>; -def SPRWriteResGroup310 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup310 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1, 2]; let Latency = 26; let NumMicroOps = 6; @@ -3132,7 +3132,7 @@ def SPRWriteResGroup312 : SchedWriteRes<[SPRPort00, SPRPort05]> { def : InstRW<[SPRWriteResGroup312], (instregex "^VCVTP(D2PH|H2PD)Zrr(bk|kz)$", "^VCVTP(D2PH|H2PD)Zrr(k|bkz)$")>; -def SPRWriteResGroup313 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup313 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let Latency = 11; let NumMicroOps = 2; } @@ -3174,14 +3174,14 @@ def : InstRW<[SPRWriteResGroup314], (instregex "^VCVT(T?)PD2(U?)QQZ(128|256)rr(( "^VSCALEFS(D|S)Zrrb_Int((k|kz)?)$")>; def : InstRW<[SPRWriteResGroup314, ReadAfterVecLd], (instregex "^VFIXUPIMMS(D|S)Zrrib((k|kz)?)$")>; -def SPRWriteResGroup315 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup315 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let Latency = 14; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup315], (instregex "^VCVT(T?)PH2(U?)DQZ128rm(b?)$", "^VCVTPS2PHXZ128rm(b?)$")>; -def SPRWriteResGroup316 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup316 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let Latency = 17; let NumMicroOps = 3; } @@ -3195,7 +3195,7 @@ def SPRWriteResGroup317 : SchedWriteRes<[SPRPort00_01, SPRPort05]> { def : InstRW<[SPRWriteResGroup317], (instregex "^VCVT(T?)PH2(U?)DQZ(128|256)rrk(z?)$", "^VCVTP(H2PS|S2PH)(X?)Z256rrk(z?)$")>; -def SPRWriteResGroup318 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup318 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let Latency = 18; let NumMicroOps = 3; } @@ -3204,7 +3204,7 @@ def : InstRW<[SPRWriteResGroup318], (instregex "^VCVT(T?)PH2(U?)DQZ256rm(bk|kz)$ "^VCVTP(H2PS|S2PH)XZ256rm(bk|kz)$", "^VCVTP(H2PS|S2PH)XZ256rm(k|bkz)$")>; -def SPRWriteResGroup319 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup319 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let Latency = 18; let NumMicroOps = 3; } @@ -3231,14 +3231,14 @@ def : InstRW<[SPRWriteResGroup321], (instregex "^VCVT(T?)PH2(U?)DQZrr(bk|kz)$", "^VCVTP(H2PS|S2PH)XZrr(bk|kz)$", "^VCVTP(H2PS|S2PH)XZrr(k|bkz)$")>; -def SPRWriteResGroup322 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup322 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1, 1, 2]; let Latency = 23; let NumMicroOps = 7; } def : InstRW<[SPRWriteResGroup322], (instregex "^VCVTPH2PDZ128rm(b?)$")>; -def SPRWriteResGroup323 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup323 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1, 1, 2]; let Latency = 26; let NumMicroOps = 7; @@ -3260,14 +3260,14 @@ def SPRWriteResGroup325 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0 } def : InstRW<[SPRWriteResGroup325], (instregex "^VCVTPH2PDZ128rrk(z?)$")>; -def SPRWriteResGroup326 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup326 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 2]; let Latency = 22; let NumMicroOps = 5; } def : InstRW<[SPRWriteResGroup326], (instregex "^VCVTPH2PDZ256rm(b?)$")>; -def SPRWriteResGroup327 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup327 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 2]; let Latency = 25; let NumMicroOps = 5; @@ -3295,7 +3295,7 @@ def SPRWriteResGroup330 : SchedWriteRes<[SPRPort00_01, SPRPort05]> { } def : InstRW<[SPRWriteResGroup330], (instregex "^VCVTP(H2PS|S2PH)(X?)Z128rrk(z?)$")>; -def SPRWriteResGroup331 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup331 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let Latency = 14; let NumMicroOps = 2; } @@ -3308,7 +3308,7 @@ def : InstRW<[SPRWriteResGroup331, ReadAfterVecYLd], (instregex "^VPMADDUBSWZ256 "^VPMULH((U|RS)?)WZ256rmk(z?)$", "^VPMULLWZ256rmk(z?)$")>; -def SPRWriteResGroup332 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup332 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let Latency = 13; let NumMicroOps = 3; } @@ -3317,7 +3317,7 @@ def : InstRW<[SPRWriteResGroup332], (instregex "^VCVT(T?)PS2(U?)QQZrm((b|k|bk|kz def : InstRW<[SPRWriteResGroup332], (instrs VCVTPH2PSZrm)>; def : InstRW<[SPRWriteResGroup332, ReadAfterVecYLd], (instregex "^VPERMWZrmk(z?)$")>; -def SPRWriteResGroup333 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup333 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 2, 1, 1, 1]; let Latency = 17; let NumMicroOps = 6; @@ -3332,7 +3332,7 @@ def SPRWriteResGroup334 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0 } def : InstRW<[SPRWriteResGroup334], (instregex "^VCVT(T?)PH2(U?)QQZ(128|256)rr((k|kz)?)$")>; -def SPRWriteResGroup335 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup335 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 2, 1, 1, 1]; let Latency = 18; let NumMicroOps = 6; @@ -3340,7 +3340,7 @@ def SPRWriteResGroup335 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0 def : InstRW<[SPRWriteResGroup335], (instregex "^VCVT(T?)PH2(U?)QQZ256rm((b|k|bk|kz)?)$", "^VCVT(T?)PH2(U?)QQZ256rmbkz$")>; -def SPRWriteResGroup336 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup336 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let Latency = 16; let NumMicroOps = 3; } @@ -3348,7 +3348,7 @@ def : InstRW<[SPRWriteResGroup336], (instregex "^VCVTPS2PHXZ128rm(bk|kz)$", "^VCVTPS2PHXZ128rm(k|bkz)$", "^VCVTPS2PHXZ256rm(b?)$")>; -def SPRWriteResGroup337 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup337 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let Latency = 16; let NumMicroOps = 3; } @@ -3373,13 +3373,13 @@ def SPRWriteResGroup340 : SchedWriteRes<[SPRPort00_01, SPRPort05]> { def : InstRW<[SPRWriteResGroup340], (instregex "^VCVT(T?)PS2(U?)QQZ128rr((k|kz)?)$", "^VCVT(U?)QQ2PSZ128rr((k|kz)?)$")>; -def SPRWriteResGroup341 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup341 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let Latency = 15; let NumMicroOps = 5; } def : InstRW<[SPRWriteResGroup341], (instregex "^VCVT(U?)QQ2PHZ128rm(b?)$")>; -def SPRWriteResGroup342 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup342 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let Latency = 17; let NumMicroOps = 5; } @@ -3399,13 +3399,13 @@ def SPRWriteResGroup344 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0 def : InstRW<[SPRWriteResGroup344], (instregex "^VCVT(U?)QQ2PHZ128rrk(z?)$", "^VCVT(U?)QQ2PHZ256rr$")>; -def SPRWriteResGroup345 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup345 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let Latency = 18; let NumMicroOps = 5; } def : InstRW<[SPRWriteResGroup345], (instregex "^VCVT(U?)QQ2PHZ256rm(b?)$")>; -def SPRWriteResGroup346 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup346 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let Latency = 20; let NumMicroOps = 5; } @@ -3418,14 +3418,14 @@ def SPRWriteResGroup347 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0 } def : InstRW<[SPRWriteResGroup347], (instregex "^VCVT(U?)QQ2PHZ256rrk(z?)$")>; -def SPRWriteResGroup348 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup348 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 1, 2]; let Latency = 18; let NumMicroOps = 5; } def : InstRW<[SPRWriteResGroup348], (instregex "^VCVT(U?)QQ2PHZrm(b?)$")>; -def SPRWriteResGroup349 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup349 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 1, 2]; let Latency = 20; let NumMicroOps = 5; @@ -3448,14 +3448,14 @@ def SPRWriteResGroup351 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort05]> { def : InstRW<[SPRWriteResGroup351], (instregex "^VCVT(U?)QQ2PHZrr(bk|kz)$", "^VCVT(U?)QQ2PHZrr(k|bkz)$")>; -def SPRWriteResGroup352 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup352 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 2, 1, 1, 1]; let Latency = 18; let NumMicroOps = 7; } def : InstRW<[SPRWriteResGroup352, ReadAfterVecLd], (instregex "^VCVTSD2SHZrm((_Int)?)$")>; -def SPRWriteResGroup353 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup353 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 2, 1, 1, 1]; let Latency = 21; let NumMicroOps = 7; @@ -3477,14 +3477,14 @@ def SPRWriteResGroup355 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0 } def : InstRW<[SPRWriteResGroup355], (instregex "^VCVTSD2SHZrr(b?)_Intk(z?)$")>; -def SPRWriteResGroup356 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup356 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 18; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup356, ReadAfterVecLd], (instregex "^VCVTSH2SDZrm((_Int)?)$")>; -def SPRWriteResGroup357 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup357 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 20; let NumMicroOps = 4; @@ -3506,7 +3506,7 @@ def SPRWriteResGroup359 : SchedWriteRes<[SPRPort00_01, SPRPort05]> { } def : InstRW<[SPRWriteResGroup359], (instregex "^VCVTSH2SDZrr(b?)_Intk(z?)$")>; -def SPRWriteResGroup360 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup360 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort02_03_10]> { let Latency = 13; let NumMicroOps = 3; } @@ -3525,14 +3525,14 @@ def SPRWriteResGroup362 : SchedWriteRes<[SPRPort00_01]> { } def : InstRW<[SPRWriteResGroup362], (instregex "^VCVTSH2SSZrr(b?)_Intk(z?)$")>; -def SPRWriteResGroup363 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup363 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_10]> { let Latency = 14; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup363, ReadAfterVecLd], (instregex "^VCVT(U?)SI((64)?)2SHZrm((_Int)?)$", "^VCVTSS2SHZrm((_Int)?)$")>; -def SPRWriteResGroup364 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup364 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_10]> { let Latency = 16; let NumMicroOps = 3; } @@ -3572,14 +3572,14 @@ def : InstRW<[SPRWriteResGroup367], (instregex "^VDBPSADBWZ(128|256)rrik(z?)$", "^VPOPCNT(B|W)Z(128|256)rrk(z?)$", "^VPOPCNT(B|W)Zrrk(z?)$")>; -def SPRWriteResGroup368 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup368 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 36; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup368, ReadAfterVecXLd], (instregex "^VDIVPHZ128rm(b?)$")>; -def SPRWriteResGroup369 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup369 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 38; let NumMicroOps = 4; @@ -3603,14 +3603,14 @@ def : InstRW<[SPRWriteResGroup371], (instregex "^VDIVPHZ(128|256)rrk$", "^VSQRTPHZ(128|256)r$")>; def : InstRW<[SPRWriteResGroup371], (instrs VDIVPHZ128rrkz)>; -def SPRWriteResGroup372 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup372 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 37; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup372, ReadAfterVecYLd], (instregex "^VDIVPHZ256rm(b?)$")>; -def SPRWriteResGroup373 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup373 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 39; let NumMicroOps = 4; @@ -3626,14 +3626,14 @@ def SPRWriteResGroup374 : SchedWriteRes<[SPRPort00, SPRPort00_01_05]> { } def : InstRW<[SPRWriteResGroup374], (instrs VDIVPHZ256rrkz)>; -def SPRWriteResGroup375 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup375 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [4, 2, 1, 1, 1]; let Latency = 49; let NumMicroOps = 9; } def : InstRW<[SPRWriteResGroup375, ReadAfterVecYLd], (instregex "^VDIVPHZrm(b?)$")>; -def SPRWriteResGroup376 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup376 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [4, 2, 1, 1, 1]; let Latency = 51; let NumMicroOps = 9; @@ -3663,7 +3663,7 @@ def SPRWriteResGroup379 : SchedWriteRes<[SPRPort00, SPRPort00_05]> { } def : InstRW<[SPRWriteResGroup379], (instrs VDIVPSZrr)>; -def SPRWriteResGroup380 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup380 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 21; let NumMicroOps = 2; } @@ -3676,7 +3676,7 @@ def SPRWriteResGroup381 : SchedWriteRes<[SPRPort00]> { def : InstRW<[SPRWriteResGroup381], (instrs VDIVSHZrr_Int, VSQRTSHZr_Int)>; -def SPRWriteResGroup382 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup382 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 2]; let Latency = 15; let NumMicroOps = 5; @@ -3691,7 +3691,7 @@ def SPRWriteResGroup383 : SchedWriteRes<[SPRPort00_01, SPRPort05]> { } def : InstRW<[SPRWriteResGroup383], (instregex "^VDPBF16PSZ(128|256)r((k|kz)?)$")>; -def SPRWriteResGroup384 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup384 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 2]; let Latency = 16; let NumMicroOps = 5; @@ -3699,35 +3699,35 @@ def SPRWriteResGroup384 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort0 def : InstRW<[SPRWriteResGroup384, ReadAfterVecYLd], (instregex "^VDPBF16PSZ256m((b|k|bk|kz)?)$")>; def : InstRW<[SPRWriteResGroup384, ReadAfterVecYLd], (instrs VDPBF16PSZ256mbkz)>; -def SPRWriteResGroup385 : SchedWriteRes<[SPRPort00, SPRPort01, SPRPort02_03_11]> { +def SPRWriteResGroup385 : SchedWriteRes<[SPRPort00, SPRPort01, SPRPort02_03_10]> { let ReleaseAtCycles = [6, 7, 18]; let Latency = 81; let NumMicroOps = 31; } def : InstRW<[SPRWriteResGroup385], (instrs VERRm)>; -def SPRWriteResGroup386 : SchedWriteRes<[SPRPort00, SPRPort01, SPRPort02_03_11]> { +def SPRWriteResGroup386 : SchedWriteRes<[SPRPort00, SPRPort01, SPRPort02_03_10]> { let ReleaseAtCycles = [6, 7, 17]; let Latency = 74; let NumMicroOps = 30; } def : InstRW<[SPRWriteResGroup386], (instrs VERRr)>; -def SPRWriteResGroup387 : SchedWriteRes<[SPRPort00, SPRPort01, SPRPort02_03_11]> { +def SPRWriteResGroup387 : SchedWriteRes<[SPRPort00, SPRPort01, SPRPort02_03_10]> { let ReleaseAtCycles = [5, 8, 21]; let Latency = 81; let NumMicroOps = 34; } def : InstRW<[SPRWriteResGroup387], (instrs VERWm)>; -def SPRWriteResGroup388 : SchedWriteRes<[SPRPort00, SPRPort01, SPRPort02_03_11]> { +def SPRWriteResGroup388 : SchedWriteRes<[SPRPort00, SPRPort01, SPRPort02_03_10]> { let ReleaseAtCycles = [5, 8, 20]; let Latency = 74; let NumMicroOps = 33; } def : InstRW<[SPRWriteResGroup388], (instrs VERWr)>; -def SPRWriteResGroup389 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup389 : SchedWriteRes<[SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 2]; let Latency = 10; let NumMicroOps = 3; @@ -3736,7 +3736,7 @@ def : InstRW<[SPRWriteResGroup389, ReadAfterVecYLd], (instregex "^VEXPANDP(D|S)Z "^VPEXPAND(B|D|Q|W)Z128rm$", "^VPEXPAND(D|Q)Z128rmk(z?)$")>; -def SPRWriteResGroup390 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup390 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 16; let NumMicroOps = 3; @@ -3755,7 +3755,7 @@ def : InstRW<[SPRWriteResGroup390, ReadAfterVecYLd], (instregex "^VF(C?)MULCPHZ2 "^VSCALEFPHZ256rm(b?)$")>; def : InstRW<[SPRWriteResGroup390, ReadAfterVecLd], (instrs VSCALEFSHZrm)>; -def SPRWriteResGroup391 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup391 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 21; let NumMicroOps = 3; @@ -3797,7 +3797,7 @@ def : InstRW<[SPRWriteResGroup393], (instregex "^VF(C?)MADDCPHZ(128|256)rk(z?)$" "^VF(C?)MULCSHZrr(bk|kz)$", "^VF(C?)MULCSHZrr(k|bkz)$")>; -def SPRWriteResGroup394 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup394 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 16; let NumMicroOps = 3; @@ -3808,7 +3808,7 @@ def : InstRW<[SPRWriteResGroup394, ReadAfterVecYLd], (instregex "^VF(C?)MULCPHZr "^VRNDSCALEP(D|S)Zrm(b?)ik(z?)$", "^VSCALEFPHZrm(b?)$")>; -def SPRWriteResGroup395 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup395 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 21; let NumMicroOps = 3; @@ -3838,7 +3838,7 @@ def : InstRW<[SPRWriteResGroup397], (instregex "^VF(C?)MADDCPHZr(bk|kz)$", "^VF(C?)MULCPHZrr(bk|kz)$", "^VF(C?)MULCPHZrr(k|bkz)$")>; -def SPRWriteResGroup398 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup398 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [1, 1, 2, 4]; let Latency = 29; let NumMicroOps = 8; @@ -3848,7 +3848,7 @@ def : InstRW<[SPRWriteResGroup398, WriteVecMaskedGatherWriteback], (instregex "^ def : InstRW<[SPRWriteResGroup398, WriteVecMaskedGatherWriteback], (instrs VGATHERQPSYrm, VPGATHERQDYrm)>; -def SPRWriteResGroup399 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup399 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 20; let NumMicroOps = 4; @@ -3858,7 +3858,7 @@ def : InstRW<[SPRWriteResGroup399, WriteVecMaskedGatherWriteback], (instregex "^ def : InstRW<[SPRWriteResGroup399, WriteVecMaskedGatherWriteback], (instrs VGATHERQPSZ128rm, VPGATHERQDZ128rm)>; -def SPRWriteResGroup400 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup400 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [1, 2, 4]; let Latency = 28; let NumMicroOps = 7; @@ -3868,7 +3868,7 @@ def : InstRW<[SPRWriteResGroup400, WriteVecMaskedGatherWriteback], (instregex "^ def : InstRW<[SPRWriteResGroup400, WriteVecMaskedGatherWriteback], (instrs VGATHERQPSZ256rm, VPGATHERQDZ256rm)>; -def SPRWriteResGroup401 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup401 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 8, 2]; let Latency = 28; let NumMicroOps = 11; @@ -3878,7 +3878,7 @@ def : InstRW<[SPRWriteResGroup401, WriteVecMaskedGatherWriteback], (instregex "^ def : InstRW<[SPRWriteResGroup401, WriteVecMaskedGatherWriteback], (instrs VGATHERQPSZrm, VPGATHERQDZrm)>; -def SPRWriteResGroup402 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup402 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [1, 1, 1, 2]; let Latency = 20; let NumMicroOps = 5; @@ -3888,7 +3888,7 @@ def : InstRW<[SPRWriteResGroup402, WriteVecMaskedGatherWriteback], (instregex "^ def : InstRW<[SPRWriteResGroup402, WriteVecMaskedGatherWriteback], (instrs VGATHERQPSrm, VPGATHERQDrm)>; -def SPRWriteResGroup403 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup403 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [1, 1, 2, 8]; let Latency = 30; let NumMicroOps = 12; @@ -3896,7 +3896,7 @@ def SPRWriteResGroup403 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_0 def : InstRW<[SPRWriteResGroup403, WriteVecMaskedGatherWriteback], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>; -def SPRWriteResGroup404 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup404 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [1, 2, 4]; let Latency = 27; let NumMicroOps = 7; @@ -3904,7 +3904,7 @@ def SPRWriteResGroup404 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_1 def : InstRW<[SPRWriteResGroup404, WriteVecMaskedGatherWriteback], (instrs VGATHERDPSZ128rm, VPGATHERDDZ128rm)>; -def SPRWriteResGroup405 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup405 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [1, 2, 8]; let Latency = 29; let NumMicroOps = 11; @@ -3912,7 +3912,7 @@ def SPRWriteResGroup405 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_1 def : InstRW<[SPRWriteResGroup405, WriteVecMaskedGatherWriteback], (instrs VGATHERDPSZ256rm, VPGATHERDDZ256rm)>; -def SPRWriteResGroup406 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup406 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 16, 2]; let Latency = 30; let NumMicroOps = 19; @@ -3920,7 +3920,7 @@ def SPRWriteResGroup406 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> def : InstRW<[SPRWriteResGroup406, WriteVecMaskedGatherWriteback], (instrs VGATHERDPSZrm, VPGATHERDDZrm)>; -def SPRWriteResGroup407 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup407 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [1, 1, 2, 4]; let Latency = 28; let NumMicroOps = 8; @@ -3928,7 +3928,7 @@ def SPRWriteResGroup407 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_0 def : InstRW<[SPRWriteResGroup407, WriteVecMaskedGatherWriteback], (instrs VGATHERDPSrm, VPGATHERDDrm)>; -def SPRWriteResGroup408 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup408 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let Latency = 15; let NumMicroOps = 2; } @@ -3949,7 +3949,7 @@ def SPRWriteResGroup410 : SchedWriteRes<[SPRPort00_01]> { def : InstRW<[SPRWriteResGroup410], (instregex "^VGF2P8AFFINE((INV)?)QBZ(128|256)rrikz$", "^VGF2P8MULBZ(128|256)rrkz$")>; -def SPRWriteResGroup411 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup411 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 15; let NumMicroOps = 2; } @@ -3975,20 +3975,20 @@ def SPRWriteResGroup414 : SchedWriteRes<[SPRPort01_05, SPRPort05]> { } def : InstRW<[SPRWriteResGroup414], (instregex "^VH(ADD|SUB)P(D|S)rr$")>; -def SPRWriteResGroup415 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort02_03_11]> { +def SPRWriteResGroup415 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort02_03_10]> { let Latency = 7; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup415], (instrs VLDMXCSR)>; -def SPRWriteResGroup416 : SchedWriteRes<[SPRPort01, SPRPort01_05, SPRPort02_03, SPRPort02_03_11, SPRPort04, SPRPort04_09, SPRPort05, SPRPort06]> { +def SPRWriteResGroup416 : SchedWriteRes<[SPRPort01, SPRPort01_05, SPRPort02_03, SPRPort02_03_10, SPRPort04, SPRPort04_09, SPRPort05, SPRPort06]> { let ReleaseAtCycles = [1, 1, 1, 8, 1, 1, 2, 3]; let Latency = 40; let NumMicroOps = 18; } def : InstRW<[SPRWriteResGroup416], (instrs VMCLEARm)>; -def SPRWriteResGroup417 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup417 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10]> { let Latency = 11; let NumMicroOps = 2; } @@ -4008,7 +4008,7 @@ def : InstRW<[SPRWriteResGroup418], (instregex "^VMOVDQU(8|16)Z(128|256)rrk(z?)( "^VPBLENDM(B|W)Z(128|256)rrk(z?)$", "^VPMOVM2(B|W)Z(128|256)rk$")>; -def SPRWriteResGroup419 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup419 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [1, 2, 2]; let Latency = 12; let NumMicroOps = 5; @@ -4082,7 +4082,7 @@ def SPRWriteResGroup430 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> { } def : InstRW<[SPRWriteResGroup430], (instrs VMOVNTPSZmr)>; -def SPRWriteResGroup431 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup431 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [3, 1, 8]; let Latency = 10; let NumMicroOps = 12; @@ -4098,7 +4098,7 @@ def SPRWriteResGroup432 : SchedWriteRes<[SPRPort00_01_05, SPRPort05]> { def : InstRW<[SPRWriteResGroup432], (instrs VP2INTERSECTDZ128rr, VP2INTERSECTQZ256rr)>; -def SPRWriteResGroup433 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup433 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 8, 7, 2, 1, 11]; let Latency = 27; let NumMicroOps = 30; @@ -4112,7 +4112,7 @@ def SPRWriteResGroup434 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_0 } def : InstRW<[SPRWriteResGroup434], (instrs VP2INTERSECTDZ256rr)>; -def SPRWriteResGroup435 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup435 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [13, 9, 1, 23]; let Latency = 40; let NumMicroOps = 46; @@ -4126,7 +4126,7 @@ def SPRWriteResGroup436 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort05]> { } def : InstRW<[SPRWriteResGroup436], (instrs VP2INTERSECTDZrr)>; -def SPRWriteResGroup437 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup437 : SchedWriteRes<[SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 4]; let Latency = 6; let NumMicroOps = 5; @@ -4140,7 +4140,7 @@ def SPRWriteResGroup438 : SchedWriteRes<[SPRPort05]> { } def : InstRW<[SPRWriteResGroup438], (instrs VP2INTERSECTQZ128rr)>; -def SPRWriteResGroup439 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup439 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [8, 7, 1, 14]; let Latency = 29; let NumMicroOps = 30; @@ -4169,7 +4169,7 @@ def : InstRW<[SPRWriteResGroup441], (instregex "^VP(A|SU)BS(B|W)Z(128|256)rrk(z? "^VPSRAWZ(128|256)rik(z?)$", "^VPSUBUS(B|W)Z(128|256)rrk(z?)$")>; -def SPRWriteResGroup442 : SchedWriteRes<[SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup442 : SchedWriteRes<[SPRPort01_05, SPRPort02_03_10]> { let Latency = 9; let NumMicroOps = 2; } @@ -4206,21 +4206,21 @@ def SPRWriteResGroup445 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort04_09, def : InstRW<[SPRWriteResGroup445], (instregex "^VPCOMPRESS(B|W)Z(128|256)mrk$")>; def : InstRW<[SPRWriteResGroup445], (instrs VPCOMPRESSWZmrk)>; -def SPRWriteResGroup446 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup446 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [1, 1, 2, 2, 2]; let Latency = 12; let NumMicroOps = 8; } def : InstRW<[SPRWriteResGroup446], (instrs VPCOMPRESSBZmr)>; -def SPRWriteResGroup447 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup447 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [1, 1, 2, 2, 2]; let Latency = 14; let NumMicroOps = 8; } def : InstRW<[SPRWriteResGroup447], (instrs VPCOMPRESSBZmrk)>; -def SPRWriteResGroup448 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup448 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [5, 4, 1, 5]; let Latency = 17; let NumMicroOps = 15; @@ -4235,7 +4235,7 @@ def SPRWriteResGroup449 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0 } def : InstRW<[SPRWriteResGroup449], (instregex "^VPCONFLICTDZ128rr((k|kz)?)$")>; -def SPRWriteResGroup450 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup450 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [7, 5, 1, 1, 9]; let Latency = 24; let NumMicroOps = 23; @@ -4250,7 +4250,7 @@ def SPRWriteResGroup451 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0 } def : InstRW<[SPRWriteResGroup451], (instregex "^VPCONFLICTDZ256rr((k|kz)?)$")>; -def SPRWriteResGroup452 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup452 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [11, 8, 1, 17]; let Latency = 33; let NumMicroOps = 37; @@ -4272,7 +4272,7 @@ def SPRWriteResGroup454 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort05]> { } def : InstRW<[SPRWriteResGroup454], (instrs VPCONFLICTDZrrk)>; -def SPRWriteResGroup455 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup455 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 11; let NumMicroOps = 4; @@ -4288,7 +4288,7 @@ def SPRWriteResGroup456 : SchedWriteRes<[SPRPort00_01_05, SPRPort05]> { } def : InstRW<[SPRWriteResGroup456], (instregex "^VPCONFLICTQZ128rr((k|kz)?)$")>; -def SPRWriteResGroup457 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup457 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [5, 4, 1, 5]; let Latency = 20; let NumMicroOps = 15; @@ -4303,7 +4303,7 @@ def SPRWriteResGroup458 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0 } def : InstRW<[SPRWriteResGroup458], (instregex "^VPCONFLICTQZ256rr((k|kz)?)$")>; -def SPRWriteResGroup459 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup459 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [7, 5, 1, 9]; let Latency = 23; let NumMicroOps = 22; @@ -4325,7 +4325,7 @@ def SPRWriteResGroup461 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort05]> { } def : InstRW<[SPRWriteResGroup461], (instrs VPCONFLICTQZrrk)>; -def SPRWriteResGroup462 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup462 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 13; let NumMicroOps = 4; @@ -4348,14 +4348,14 @@ def SPRWriteResGroup464 : SchedWriteRes<[SPRPort00_01_05, SPRPort05]> { def : InstRW<[SPRWriteResGroup464], (instregex "^VPERM(I|T)2BZ(128|256)rrk(z?)$", "^VPERM(I|T)2WZ(128|256)rr$")>; -def SPRWriteResGroup465 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup465 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 12; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup465, ReadAfterVecYLd], (instregex "^VPERM(I|T)2BZ256rm$")>; -def SPRWriteResGroup466 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup466 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 14; let NumMicroOps = 4; @@ -4364,14 +4364,14 @@ def : InstRW<[SPRWriteResGroup466, ReadAfterVecYLd], (instregex "^VPERM(I|T)2BZ2 def : InstRW<[SPRWriteResGroup466, ReadAfterVecYLd], (instrs VPERMI2WZ128rm, VPERMT2WZ256rm)>; -def SPRWriteResGroup467 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup467 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 12; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup467, ReadAfterVecYLd], (instregex "^VPERM(I|T)2BZrm$")>; -def SPRWriteResGroup468 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup468 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 14; let NumMicroOps = 4; @@ -4394,7 +4394,7 @@ def SPRWriteResGroup470 : SchedWriteRes<[SPRPort00_05, SPRPort05]> { def : InstRW<[SPRWriteResGroup470], (instregex "^VPERM(I|T)2BZrrk(z?)$", "^VPERM(I|T)2WZrr$")>; -def SPRWriteResGroup471 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup471 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 16; let NumMicroOps = 4; @@ -4409,7 +4409,7 @@ def SPRWriteResGroup472 : SchedWriteRes<[SPRPort00_01_05, SPRPort05]> { } def : InstRW<[SPRWriteResGroup472], (instregex "^VPERM(I|T)2WZ(128|256)rrk(z?)$")>; -def SPRWriteResGroup473 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup473 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 15; let NumMicroOps = 4; @@ -4417,21 +4417,21 @@ def SPRWriteResGroup473 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPo def : InstRW<[SPRWriteResGroup473, ReadAfterVecYLd], (instregex "^VPERMT2WZ128rmk(z?)$")>; def : InstRW<[SPRWriteResGroup473, ReadAfterVecYLd], (instrs VPERMI2WZ256rm)>; -def SPRWriteResGroup474 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup474 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 17; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup474, ReadAfterVecYLd], (instregex "^VPERMI2WZ256rmk(z?)$")>; -def SPRWriteResGroup475 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup475 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 15; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup475, ReadAfterVecYLd], (instrs VPERMI2WZrm)>; -def SPRWriteResGroup476 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup476 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 17; let NumMicroOps = 4; @@ -4445,20 +4445,20 @@ def SPRWriteResGroup477 : SchedWriteRes<[SPRPort00_05, SPRPort05]> { } def : InstRW<[SPRWriteResGroup477], (instregex "^VPERM(I|T)2WZrrk(z?)$")>; -def SPRWriteResGroup478 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup478 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 16; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup478, ReadAfterVecYLd], (instregex "^VPERMT2WZrmk(z?)$")>; -def SPRWriteResGroup479 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup479 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let Latency = 10; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup479, ReadAfterVecYLd], (instrs VPERMWZ128rm)>; -def SPRWriteResGroup480 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup480 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let Latency = 13; let NumMicroOps = 3; } @@ -4470,13 +4470,13 @@ def SPRWriteResGroup481 : SchedWriteRes<[SPRPort00_01, SPRPort05]> { } def : InstRW<[SPRWriteResGroup481], (instregex "^VPERMWZ(128|256)rr$")>; -def SPRWriteResGroup482 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup482 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let Latency = 11; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup482, ReadAfterVecYLd], (instrs VPERMWZ256rm)>; -def SPRWriteResGroup483 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup483 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let Latency = 11; let NumMicroOps = 3; } @@ -4490,7 +4490,7 @@ def SPRWriteResGroup484 : SchedWriteRes<[SPRPort05]> { def : InstRW<[SPRWriteResGroup484], (instregex "^VPEXPAND(B|W)Z(128|256)rrk(z?)$", "^VPEXPAND(B|W)Zrrk(z?)$")>; -def SPRWriteResGroup485 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup485 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [1, 2, 1]; let Latency = 10; let NumMicroOps = 4; @@ -4504,7 +4504,7 @@ def : InstRW<[SPRWriteResGroup486], (instregex "^VPMADDUBSWZ(128|256)rrk(z?)$", "^VPMULH((U|RS)?)WZ(128|256)rrk(z?)$", "^VPMULLWZ(128|256)rrk(z?)$")>; -def SPRWriteResGroup487 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup487 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 14; let NumMicroOps = 2; } @@ -4595,7 +4595,7 @@ def SPRWriteResGroup496 : SchedWriteRes<[SPRPort04_09, SPRPort05, SPRPort07_08]> } def : InstRW<[SPRWriteResGroup496], (instregex "^VPMOVQDZ((256)?)mrk$")>; -def SPRWriteResGroup497 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup497 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let ReleaseAtCycles = [3, 1]; let Latency = 23; let NumMicroOps = 4; @@ -4612,7 +4612,7 @@ def SPRWriteResGroup498 : SchedWriteRes<[SPRPort00_01]> { } def : InstRW<[SPRWriteResGroup498], (instregex "^VPMULLQZ(128|256)rr((k|kz)?)$")>; -def SPRWriteResGroup499 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup499 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let ReleaseAtCycles = [3, 1]; let Latency = 23; let NumMicroOps = 4; @@ -4627,7 +4627,7 @@ def SPRWriteResGroup500 : SchedWriteRes<[SPRPort00]> { } def : InstRW<[SPRWriteResGroup500], (instregex "^VPMULLQZrr((k|kz)?)$")>; -def SPRWriteResGroup501 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup501 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_11, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [1, 1, 1, 4, 4]; let Latency = 12; let NumMicroOps = 11; @@ -4639,7 +4639,7 @@ def : InstRW<[SPRWriteResGroup501], (instrs VPSCATTERDDZ128mr, VSCATTERDPSZ128mr, VSCATTERQPSZ256mr)>; -def SPRWriteResGroup502 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup502 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_11, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [1, 1, 1, 8, 8]; let Latency = 12; let NumMicroOps = 19; @@ -4647,7 +4647,7 @@ def SPRWriteResGroup502 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_0 def : InstRW<[SPRWriteResGroup502], (instrs VPSCATTERDDZ256mr, VSCATTERDPSZ256mr)>; -def SPRWriteResGroup503 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup503 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [2, 1, 16, 16]; let Latency = 19; let NumMicroOps = 35; @@ -4655,7 +4655,7 @@ def SPRWriteResGroup503 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPo def : InstRW<[SPRWriteResGroup503], (instrs VPSCATTERDDZmr, VSCATTERDPSZmr)>; -def SPRWriteResGroup504 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup504 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_11, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [1, 1, 1, 2, 2]; let Latency = 12; let NumMicroOps = 7; @@ -4665,7 +4665,7 @@ def : InstRW<[SPRWriteResGroup504], (instregex "^VPSCATTER(D|Q)QZ128mr$", def : InstRW<[SPRWriteResGroup504], (instrs VPSCATTERQDZ128mr, VSCATTERQPSZ128mr)>; -def SPRWriteResGroup505 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup505 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [2, 1, 8, 8]; let Latency = 12; let NumMicroOps = 19; @@ -4675,7 +4675,7 @@ def : InstRW<[SPRWriteResGroup505], (instregex "^VPSCATTER(D|Q)QZmr$", def : InstRW<[SPRWriteResGroup505], (instrs VPSCATTERQDZmr, VSCATTERQPSZmr)>; -def SPRWriteResGroup506 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup506 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let Latency = 8; let NumMicroOps = 2; } @@ -4685,7 +4685,7 @@ def : InstRW<[SPRWriteResGroup506, ReadAfterVecXLd], (instregex "^VPSH(L|R)D(D|Q "^VPSH(L|R)DV(D|Q)Z128m(b|k|kz)$", "^VPSH(L|R)DV(D|Q)Z128mbk(z?)$")>; -def SPRWriteResGroup507 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup507 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_10]> { let Latency = 9; let NumMicroOps = 3; } @@ -4702,7 +4702,7 @@ def SPRWriteResGroup509 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05]> { } def : InstRW<[SPRWriteResGroup509], (instregex "^VPSH(L|R)D(D|Q)Z(128|256)rrik(z?)$")>; -def SPRWriteResGroup510 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup510 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let Latency = 9; let NumMicroOps = 2; } @@ -4712,13 +4712,13 @@ def : InstRW<[SPRWriteResGroup510, ReadAfterVecYLd], (instregex "^VPSH(L|R)D(D|Q "^VPSH(L|R)DV(D|Q)Z256m(b|k|kz)$", "^VPSH(L|R)DV(D|Q)Z256mbk(z?)$")>; -def SPRWriteResGroup511 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup511 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_10]> { let Latency = 10; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup511, ReadAfterVecYLd], (instregex "^VPSH(L|R)D(D|Q)Z256rm(b?)ik(z?)$")>; -def SPRWriteResGroup512 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup512 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 9; let NumMicroOps = 2; } @@ -4728,7 +4728,7 @@ def : InstRW<[SPRWriteResGroup512, ReadAfterVecYLd], (instregex "^VPSH(L|R)D(D|Q "^VPSH(L|R)DV(D|Q)Zm(b|k|kz)$", "^VPSH(L|R)DV(D|Q)Zmbk(z?)$")>; -def SPRWriteResGroup513 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11]> { +def SPRWriteResGroup513 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_10]> { let Latency = 10; let NumMicroOps = 3; } @@ -4740,7 +4740,7 @@ def SPRWriteResGroup514 : SchedWriteRes<[SPRPort00, SPRPort00_05]> { } def : InstRW<[SPRWriteResGroup514], (instregex "^VPSH(L|R)D(D|Q)Zrrik(z?)$")>; -def SPRWriteResGroup515 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup515 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_10]> { let Latency = 11; let NumMicroOps = 3; } @@ -4752,13 +4752,13 @@ def SPRWriteResGroup516 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05]> { } def : InstRW<[SPRWriteResGroup516], (instregex "^VPSH(L|R)DWZ(128|256)rrik(z?)$")>; -def SPRWriteResGroup517 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup517 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_10]> { let Latency = 12; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup517, ReadAfterVecYLd], (instregex "^VPSH(L|R)DWZ256rmik(z?)$")>; -def SPRWriteResGroup518 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11]> { +def SPRWriteResGroup518 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_10]> { let Latency = 12; let NumMicroOps = 3; } @@ -4770,14 +4770,14 @@ def SPRWriteResGroup519 : SchedWriteRes<[SPRPort00, SPRPort00_05]> { } def : InstRW<[SPRWriteResGroup519], (instregex "^VPSH(L|R)DWZrrik(z?)$")>; -def SPRWriteResGroup520 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup520 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let Latency = 6; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup520, ReadAfterVecXLd], (instrs VPSHUFBITQMBZ128rm)>; def : InstRW<[SPRWriteResGroup520, ReadAfterVecYLd], (instregex "^VPSHUFBITQMBZ((256)?)rm$")>; -def SPRWriteResGroup521 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup521 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let Latency = 8; let NumMicroOps = 3; } @@ -4791,7 +4791,7 @@ def SPRWriteResGroup522 : SchedWriteRes<[SPRPort00_01, SPRPort01_05]> { def : InstRW<[SPRWriteResGroup522], (instregex "^VPS(L|R)LWZ128rrk(z?)$", "^VPSRAWZ128rrk(z?)$")>; -def SPRWriteResGroup523 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11]> { +def SPRWriteResGroup523 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 16; let NumMicroOps = 4; @@ -4806,7 +4806,7 @@ def SPRWriteResGroup524 : SchedWriteRes<[SPRPort00, SPRPort00_05]> { } def : InstRW<[SPRWriteResGroup524], (instregex "^VRCPPHZrk(z?)$")>; -def SPRWriteResGroup525 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup525 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let ReleaseAtCycles = [3, 1]; let Latency = 20; let NumMicroOps = 4; @@ -4815,7 +4815,7 @@ def : InstRW<[SPRWriteResGroup525, ReadAfterVecXLd], (instregex "^VREDUCEPHZ128r def : InstRW<[SPRWriteResGroup525, ReadAfterVecXLd], (instrs VREDUCESHZrmi)>; def : InstRW<[SPRWriteResGroup525, ReadAfterVecYLd], (instregex "^VREDUCEPHZ256rm(b?)i$")>; -def SPRWriteResGroup526 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup526 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let ReleaseAtCycles = [3, 1]; let Latency = 22; let NumMicroOps = 4; @@ -4841,14 +4841,14 @@ def : InstRW<[SPRWriteResGroup528], (instregex "^VREDUCEPHZ(128|256)rrik(z?)$", "^VREDUCESHZrri(bk|kz)$", "^VREDUCESHZrri(k|bkz)$")>; -def SPRWriteResGroup529 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup529 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let ReleaseAtCycles = [3, 1]; let Latency = 20; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup529, ReadAfterVecYLd], (instregex "^VREDUCEPHZrm(b?)i$")>; -def SPRWriteResGroup530 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup530 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let ReleaseAtCycles = [3, 1]; let Latency = 22; let NumMicroOps = 4; @@ -4878,7 +4878,7 @@ def SPRWriteResGroup533 : SchedWriteRes<[SPRPort00]> { def : InstRW<[SPRWriteResGroup533], (instregex "^VRNDSCALEP(D|S)Zrri((b|k|bk|kz)?)$", "^VRNDSCALEP(D|S)Zrribkz$")>; -def SPRWriteResGroup534 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup534 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 17; let NumMicroOps = 3; @@ -4903,7 +4903,7 @@ def : InstRW<[SPRWriteResGroup535], (instregex "^VRNDSCALEPHZ(128|256)rrik(z?)$" "^VSCALEFSHZrrb_Intk(z?)$", "^VSCALEFSHZrrk(z?)$")>; -def SPRWriteResGroup536 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup536 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 17; let NumMicroOps = 3; @@ -4931,14 +4931,14 @@ def : InstRW<[SPRWriteResGroup538], (instregex "^VRSQRT14P(D|S)Zr$")>; def : InstRW<[SPRWriteResGroup538], (instrs VRSQRT14PSZrk, VRSQRTPHZr)>; -def SPRWriteResGroup539 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup539 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 25; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup539], (instrs VSQRTPDYm)>; def : InstRW<[SPRWriteResGroup539, ReadAfterVecYLd], (instregex "^VSQRTPDZ256m(b?)$")>; -def SPRWriteResGroup540 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup540 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 20; let NumMicroOps = 2; } @@ -4946,14 +4946,14 @@ def : InstRW<[SPRWriteResGroup540, ReadAfterVecXLd], (instregex "^VSQRTPDZ128m(b "^VSQRTPDZ128m(k|bkz)$")>; def : InstRW<[SPRWriteResGroup540, ReadAfterVecLd], (instregex "^VSQRTSDZm_Intk(z?)$")>; -def SPRWriteResGroup541 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11]> { +def SPRWriteResGroup541 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 38; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup541, ReadAfterVecYLd], (instrs VSQRTPDZm)>; -def SPRWriteResGroup542 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11]> { +def SPRWriteResGroup542 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 39; let NumMicroOps = 4; @@ -4967,7 +4967,7 @@ def SPRWriteResGroup543 : SchedWriteRes<[SPRPort00, SPRPort00_05]> { } def : InstRW<[SPRWriteResGroup543], (instrs VSQRTPDZr)>; -def SPRWriteResGroup544 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup544 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 41; let NumMicroOps = 4; @@ -4990,14 +4990,14 @@ def SPRWriteResGroup546 : SchedWriteRes<[SPRPort00, SPRPort00_01_05]> { } def : InstRW<[SPRWriteResGroup546], (instrs VSQRTPHZ128rkz)>; -def SPRWriteResGroup547 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup547 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 40; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup547, ReadAfterVecYLd], (instregex "^VSQRTPHZ256m(b?)$")>; -def SPRWriteResGroup548 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup548 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 42; let NumMicroOps = 4; @@ -5005,14 +5005,14 @@ def SPRWriteResGroup548 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_0 def : InstRW<[SPRWriteResGroup548, ReadAfterVecYLd], (instregex "^VSQRTPHZ256m(bk|kz)$", "^VSQRTPHZ256m(k|bkz)$")>; -def SPRWriteResGroup549 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup549 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [4, 2, 1, 1, 1]; let Latency = 53; let NumMicroOps = 9; } def : InstRW<[SPRWriteResGroup549, ReadAfterVecYLd], (instregex "^VSQRTPHZm(b?)$")>; -def SPRWriteResGroup550 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup550 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [4, 2, 1, 1, 1]; let Latency = 55; let NumMicroOps = 9; @@ -5042,7 +5042,7 @@ def SPRWriteResGroup553 : SchedWriteRes<[SPRPort00, SPRPort00_05]> { } def : InstRW<[SPRWriteResGroup553], (instrs VSQRTPSZr)>; -def SPRWriteResGroup554 : SchedWriteRes<[SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10]> { +def SPRWriteResGroup554 : SchedWriteRes<[SPRPort00_01_05, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort01_05_11]> { let ReleaseAtCycles = [1, 2, 3, 3, 1]; let Latency = 12; let NumMicroOps = 10; @@ -5063,42 +5063,42 @@ def SPRWriteResGroup556 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_05, } def : InstRW<[SPRWriteResGroup556], (instrs WRMSR)>; -def SPRWriteResGroup557 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort05]> { +def SPRWriteResGroup557 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort05]> { let ReleaseAtCycles = [2, 1, 4, 1]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 8; } def : InstRW<[SPRWriteResGroup557], (instrs WRPKRUr)>; -def SPRWriteResGroup558 : SchedWriteRes<[SPRPort00_01_05_06_10]> { +def SPRWriteResGroup558 : SchedWriteRes<[SPRPort00_01_05_06_11]> { let ReleaseAtCycles = [2]; let Latency = 12; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup558, WriteRMW], (instregex "^XADD(16|32|64)rm$")>; -def SPRWriteResGroup559 : SchedWriteRes<[SPRPort00_01_05_06_10]> { +def SPRWriteResGroup559 : SchedWriteRes<[SPRPort00_01_05_06_11]> { let ReleaseAtCycles = [2]; let Latency = 13; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup559, WriteRMW], (instrs XADD8rm)>; -def SPRWriteResGroup560 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> { +def SPRWriteResGroup560 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06]> { let ReleaseAtCycles = [4, 1]; let Latency = 39; let NumMicroOps = 5; } def : InstRW<[SPRWriteResGroup560, WriteRMW], (instregex "^XCHG(16|32)rm$")>; -def SPRWriteResGroup561 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> { +def SPRWriteResGroup561 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06]> { let ReleaseAtCycles = [5, 1]; let Latency = 39; let NumMicroOps = 6; } def : InstRW<[SPRWriteResGroup561, WriteRMW], (instrs XCHG64rm)>; -def SPRWriteResGroup562 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> { +def SPRWriteResGroup562 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06]> { let ReleaseAtCycles = [4, 1]; let Latency = 40; let NumMicroOps = 5; @@ -5112,21 +5112,21 @@ def SPRWriteResGroup563 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06, SPRPort0 } def : InstRW<[SPRWriteResGroup563], (instrs XCH_F)>; -def SPRWriteResGroup564 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01]> { +def SPRWriteResGroup564 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_05_06, SPRPort00_06, SPRPort01]> { let ReleaseAtCycles = [7, 3, 8, 5]; let Latency = 4; let NumMicroOps = 23; } def : InstRW<[SPRWriteResGroup564], (instrs XGETBV)>; -def SPRWriteResGroup565 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> { +def SPRWriteResGroup565 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 7; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup565], (instrs XLAT)>; -def SPRWriteResGroup566 : SchedWriteRes<[SPRPort01, SPRPort02_03, SPRPort02_03_11, SPRPort06]> { +def SPRWriteResGroup566 : SchedWriteRes<[SPRPort01, SPRPort02_03, SPRPort02_03_10, SPRPort06]> { let ReleaseAtCycles = [1, 21, 1, 8]; let Latency = 37; let NumMicroOps = 31; @@ -5134,70 +5134,70 @@ def SPRWriteResGroup566 : SchedWriteRes<[SPRPort01, SPRPort02_03, SPRPort02_03_1 def : InstRW<[SPRWriteResGroup566], (instregex "^XRSTOR((S|64)?)$")>; def : InstRW<[SPRWriteResGroup566], (instrs XRSTORS64)>; -def SPRWriteResGroup567 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup567 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [14, 25, 44, 21, 21, 4, 1, 9, 1]; let Latency = 42; let NumMicroOps = 140; } def : InstRW<[SPRWriteResGroup567], (instrs XSAVE)>; -def SPRWriteResGroup568 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup568 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [14, 25, 44, 21, 21, 4, 1, 9, 1]; let Latency = 41; let NumMicroOps = 140; } def : InstRW<[SPRWriteResGroup568], (instrs XSAVE64)>; -def SPRWriteResGroup569 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup569 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [1, 19, 36, 52, 23, 4, 2, 12, 2]; let Latency = 42; let NumMicroOps = 151; } def : InstRW<[SPRWriteResGroup569], (instrs XSAVEC)>; -def SPRWriteResGroup570 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup570 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [1, 19, 36, 53, 23, 4, 2, 12, 2]; let Latency = 42; let NumMicroOps = 152; } def : InstRW<[SPRWriteResGroup570], (instrs XSAVEC64)>; -def SPRWriteResGroup571 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup571 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [25, 35, 52, 27, 4, 1, 10, 1]; let Latency = 42; let NumMicroOps = 155; } def : InstRW<[SPRWriteResGroup571], (instrs XSAVEOPT)>; -def SPRWriteResGroup572 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup572 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [25, 35, 53, 27, 4, 1, 10, 1]; let Latency = 42; let NumMicroOps = 156; } def : InstRW<[SPRWriteResGroup572], (instrs XSAVEOPT64)>; -def SPRWriteResGroup573 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup573 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [23, 32, 53, 29, 30, 4, 2, 9, 2]; let Latency = 42; let NumMicroOps = 184; } def : InstRW<[SPRWriteResGroup573], (instrs XSAVES)>; -def SPRWriteResGroup574 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup574 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [23, 33, 53, 29, 32, 4, 2, 8, 2]; let Latency = 42; let NumMicroOps = 186; } def : InstRW<[SPRWriteResGroup574], (instrs XSAVES64)>; -def SPRWriteResGroup575 : SchedWriteRes<[SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort05]> { +def SPRWriteResGroup575 : SchedWriteRes<[SPRPort00_01_05, SPRPort00_01_05_06_11, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05_11, SPRPort05]> { let ReleaseAtCycles = [4, 23, 2, 14, 8, 1, 2]; let Latency = 5; let NumMicroOps = 54; } def : InstRW<[SPRWriteResGroup575], (instrs XSETBV)>; -def SPRWriteResGroup576 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> { +def SPRWriteResGroup576 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06]> { let ReleaseAtCycles = [2, 1]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td index 9625306d716b5..f2d0f4b1a0d28 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver4.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td @@ -1658,7 +1658,7 @@ def Zn4MOVSZ: SchedWriteRes<[Zn4FPFMisc12]> { let NumMicroOps = 1; } def : InstRW<[Zn4MOVSZ], (instregex - "(V?)PMOV(SX|ZX)(BD|BQ|BW|WD|WQ|DQ)(Z?)(rr|rrk|rrkz)" + "(V?)PMOV(SX|ZX)(BD|BQ|BW|WD|WQ|DQ)Z(rr|rrk|rrkz)" )>; def Zn4MOVSrr: SchedWriteRes<[Zn4FPFMisc12]> { @@ -1667,7 +1667,7 @@ def Zn4MOVSrr: SchedWriteRes<[Zn4FPFMisc12]> { let NumMicroOps = 1; } def : InstRW<[Zn4MOVSrr], (instregex - "(V?)PMOV(DB|QB|QW|SDB|SQB|SQW|USDB|USQB|USQW)(Z?)(rr|rrk|rrkz)" + "(V?)PMOV(DB|QB|QW|SDB|SQB|SQW|USDB|USQB|USQW)Z(rr|rrk|rrkz)" )>; diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index 5450222a7b2e1..5865eb7b70b60 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -845,7 +845,7 @@ SDValue XtensaTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue SizeTmp = DAG.getNode(ISD::ADD, DL, VT, Size, DAG.getConstant(31, DL, MVT::i32)); SDValue SizeRoundUp = DAG.getNode(ISD::AND, DL, VT, SizeTmp, - DAG.getConstant(~31, DL, MVT::i32)); + DAG.getSignedConstant(~31, DL, MVT::i32)); unsigned SPReg = Xtensa::SP; SDValue SP = DAG.getCopyFromReg(Chain, DL, SPReg, VT); @@ -873,7 +873,7 @@ SDValue XtensaTargetLowering::LowerShiftLeftParts(SDValue Op, // Lo = 0 // Hi = Lo << (Shamt - register size) - SDValue MinusRegisterSize = DAG.getConstant(-32, DL, VT); + SDValue MinusRegisterSize = DAG.getSignedConstant(-32, DL, VT); SDValue ShamtMinusRegisterSize = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusRegisterSize); @@ -914,7 +914,7 @@ SDValue XtensaTargetLowering::LowerShiftRightParts(SDValue Op, // Hi = 0; unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; - SDValue MinusRegisterSize = DAG.getConstant(-32, DL, VT); + SDValue MinusRegisterSize = DAG.getSignedConstant(-32, DL, VT); SDValue RegisterSizeMinus1 = DAG.getConstant(32 - 1, DL, VT); SDValue ShamtMinusRegisterSize = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusRegisterSize); diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index e0c857b60c409..51d6b7cb9b1fd 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -2011,7 +2011,7 @@ const StringMap sys::getHostCPUFeatures() { const StringMap sys::getHostCPUFeatures() { unsigned long hwcap = getauxval(AT_HWCAP); bool HasFPU = hwcap & (1UL << 3); // HWCAP_LOONGARCH_FPU - const uint32_t cpucfg2 = 0x2, cpucfg3 = 0x3; + uint32_t cpucfg2 = 0x2, cpucfg3 = 0x3; __asm__("cpucfg %[cpucfg2], %[cpucfg2]\n\t" : [cpucfg2] "+r"(cpucfg2)); __asm__("cpucfg %[cpucfg3], %[cpucfg3]\n\t" : [cpucfg3] "+r"(cpucfg3)); diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index b236e26f495df..a2782d00b3520 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -374,10 +374,12 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["dot9-insts"] = true; Features["dot10-insts"] = true; Features["dot11-insts"] = true; + Features["dot12-insts"] = true; Features["dl-insts"] = true; Features["atomic-ds-pk-add-16-insts"] = true; Features["atomic-flat-pk-add-16-insts"] = true; Features["atomic-buffer-global-pk-add-f16-insts"] = true; + Features["atomic-buffer-pk-add-bf16-inst"] = true; Features["atomic-global-pk-add-bf16-inst"] = true; Features["16-bit-insts"] = true; Features["dpp"] = true; @@ -406,6 +408,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["dot8-insts"] = true; Features["dot9-insts"] = true; Features["dot10-insts"] = true; + Features["dot12-insts"] = true; Features["dl-insts"] = true; Features["16-bit-insts"] = true; Features["dpp"] = true; @@ -470,7 +473,14 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["gws"] = true; break; case GK_GFX950: + Features["f16bf16-to-fp6bf6-cvt-scale-insts"] = true; Features["prng-inst"] = true; + Features["permlane16-swap"] = true; + Features["permlane32-swap"] = true; + Features["ashr-pk-insts"] = true; + Features["dot12-insts"] = true; + Features["dot13-insts"] = true; + Features["atomic-buffer-pk-add-bf16-inst"] = true; Features["gfx950-insts"] = true; [[fallthrough]]; case GK_GFX942: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 46ce011c5f788..6fe9693581853 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -2245,9 +2245,7 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { const Instruction *UI = dyn_cast(U); if (!UI) return false; - return match(UI, - m_Select(m_Value(), m_Specific(Op1), m_Specific(&I))) || - match(UI, m_Select(m_Value(), m_Specific(&I), m_Specific(Op1))); + return match(UI, m_c_Select(m_Specific(Op1), m_Specific(&I))); })) { if (Value *NegOp1 = Negator::Negate(IsNegation, /* IsNSW */ IsNegation && I.hasNoSignedWrap(), diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index e2eae7fb8327c..b4033fc2a418a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1185,14 +1185,27 @@ static Value *extractIntPart(const IntPart &P, IRBuilderBase &Builder) { /// (icmp eq X0, Y0) & (icmp eq X1, Y1) -> icmp eq X01, Y01 /// (icmp ne X0, Y0) | (icmp ne X1, Y1) -> icmp ne X01, Y01 /// where X0, X1 and Y0, Y1 are adjacent parts extracted from an integer. -Value *InstCombinerImpl::foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1, - bool IsAnd) { +Value *InstCombinerImpl::foldEqOfParts(Value *Cmp0, Value *Cmp1, bool IsAnd) { if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse()) return nullptr; CmpInst::Predicate Pred = IsAnd ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE; - auto GetMatchPart = [&](ICmpInst *Cmp, + auto GetMatchPart = [&](Value *CmpV, unsigned OpNo) -> std::optional { + assert(CmpV->getType()->isIntOrIntVectorTy(1) && "Must be bool"); + + Value *X, *Y; + // icmp ne (and x, 1), (and y, 1) <=> trunc (xor x, y) to i1 + // icmp eq (and x, 1), (and y, 1) <=> not (trunc (xor x, y) to i1) + if (Pred == CmpInst::ICMP_NE + ? match(CmpV, m_Trunc(m_Xor(m_Value(X), m_Value(Y)))) + : match(CmpV, m_Not(m_Trunc(m_Xor(m_Value(X), m_Value(Y)))))) + return {{OpNo == 0 ? X : Y, 0, 1}}; + + auto *Cmp = dyn_cast(CmpV); + if (!Cmp) + return std::nullopt; + if (Pred == Cmp->getPredicate()) return matchIntPart(Cmp->getOperand(OpNo)); @@ -1465,11 +1478,15 @@ Value *InstCombinerImpl::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS, // FCmp canonicalization ensures that (fcmp ord/uno X, X) and // (fcmp ord/uno X, C) will be transformed to (fcmp X, +0.0). - if (match(LHS1, m_PosZeroFP()) && match(RHS1, m_PosZeroFP())) + if (match(LHS1, m_PosZeroFP()) && match(RHS1, m_PosZeroFP())) { // Ignore the constants because they are obviously not NANs: // (fcmp ord x, 0.0) & (fcmp ord y, 0.0) -> (fcmp ord x, y) // (fcmp uno x, 0.0) | (fcmp uno y, 0.0) -> (fcmp uno x, y) + IRBuilder<>::FastMathFlagGuard FMFG(Builder); + Builder.setFastMathFlags(LHS->getFastMathFlags() & + RHS->getFastMathFlags()); return Builder.CreateFCmp(PredL, LHS0, RHS0); + } } if (IsAnd && stripSignOnlyFPOps(LHS0) == stripSignOnlyFPOps(RHS0)) { @@ -2728,47 +2745,31 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { foldBooleanAndOr(Op0, Op1, I, /*IsAnd=*/true, /*IsLogical=*/false)) return replaceInstUsesWith(I, Res); - { - ICmpInst *LHS = dyn_cast(Op0); - ICmpInst *RHS = dyn_cast(Op1); - - // TODO: Base this on foldBooleanAndOr instead? - // TODO: Make this recursive; it's a little tricky because an arbitrary - // number of 'and' instructions might have to be created. - if (LHS && match(Op1, m_OneUse(m_LogicalAnd(m_Value(X), m_Value(Y))))) { - bool IsLogical = isa(Op1); - // LHS & (X && Y) --> (LHS && X) && Y - if (auto *Cmp = dyn_cast(X)) - if (Value *Res = - foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ true, IsLogical)) - return replaceInstUsesWith(I, IsLogical - ? Builder.CreateLogicalAnd(Res, Y) - : Builder.CreateAnd(Res, Y)); - // LHS & (X && Y) --> X && (LHS & Y) - if (auto *Cmp = dyn_cast(Y)) - if (Value *Res = foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ true, - /* IsLogical */ false)) - return replaceInstUsesWith(I, IsLogical - ? Builder.CreateLogicalAnd(X, Res) - : Builder.CreateAnd(X, Res)); - } - if (RHS && match(Op0, m_OneUse(m_LogicalAnd(m_Value(X), m_Value(Y))))) { - bool IsLogical = isa(Op0); - // (X && Y) & RHS --> (X && RHS) && Y - if (auto *Cmp = dyn_cast(X)) - if (Value *Res = - foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ true, IsLogical)) - return replaceInstUsesWith(I, IsLogical - ? Builder.CreateLogicalAnd(Res, Y) - : Builder.CreateAnd(Res, Y)); - // (X && Y) & RHS --> X && (Y & RHS) - if (auto *Cmp = dyn_cast(Y)) - if (Value *Res = foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ true, - /* IsLogical */ false)) - return replaceInstUsesWith(I, IsLogical - ? Builder.CreateLogicalAnd(X, Res) - : Builder.CreateAnd(X, Res)); - } + // TODO: Make this recursive; it's a little tricky because an arbitrary + // number of 'and' instructions might have to be created. + if (match(Op1, m_OneUse(m_LogicalAnd(m_Value(X), m_Value(Y))))) { + bool IsLogical = isa(Op1); + // Op0 & (X && Y) --> (Op0 && X) && Y + if (Value *Res = foldBooleanAndOr(Op0, X, I, /* IsAnd */ true, IsLogical)) + return replaceInstUsesWith(I, IsLogical ? Builder.CreateLogicalAnd(Res, Y) + : Builder.CreateAnd(Res, Y)); + // Op0 & (X && Y) --> X && (Op0 & Y) + if (Value *Res = foldBooleanAndOr(Op0, Y, I, /* IsAnd */ true, + /* IsLogical */ false)) + return replaceInstUsesWith(I, IsLogical ? Builder.CreateLogicalAnd(X, Res) + : Builder.CreateAnd(X, Res)); + } + if (match(Op0, m_OneUse(m_LogicalAnd(m_Value(X), m_Value(Y))))) { + bool IsLogical = isa(Op0); + // (X && Y) & Op1 --> (X && Op1) && Y + if (Value *Res = foldBooleanAndOr(X, Op1, I, /* IsAnd */ true, IsLogical)) + return replaceInstUsesWith(I, IsLogical ? Builder.CreateLogicalAnd(Res, Y) + : Builder.CreateAnd(Res, Y)); + // (X && Y) & Op1 --> X && (Y & Op1) + if (Value *Res = foldBooleanAndOr(Y, Op1, I, /* IsAnd */ true, + /* IsLogical */ false)) + return replaceInstUsesWith(I, IsLogical ? Builder.CreateLogicalAnd(X, Res) + : Builder.CreateAnd(X, Res)); } if (Instruction *FoldedFCmps = reassociateFCmps(I, Builder)) @@ -3416,9 +3417,6 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, return X; } - if (Value *X = foldEqOfParts(LHS, RHS, IsAnd)) - return X; - // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0) // TODO: Remove this and below when foldLogOpOfMaskedICmps can handle undefs. @@ -3541,6 +3539,9 @@ Value *InstCombinerImpl::foldBooleanAndOr(Value *LHS, Value *RHS, if (Value *Res = foldLogicOfFCmps(LHSCmp, RHSCmp, IsAnd, IsLogical)) return Res; + if (Value *Res = foldEqOfParts(LHS, RHS, IsAnd)) + return Res; + return nullptr; } @@ -3829,48 +3830,31 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { foldBooleanAndOr(Op0, Op1, I, /*IsAnd=*/false, /*IsLogical=*/false)) return replaceInstUsesWith(I, Res); - { - ICmpInst *LHS = dyn_cast(Op0); - ICmpInst *RHS = dyn_cast(Op1); - - // TODO: Base this on foldBooleanAndOr instead? - // TODO: Make this recursive; it's a little tricky because an arbitrary - // number of 'or' instructions might have to be created. - Value *X, *Y; - if (LHS && match(Op1, m_OneUse(m_LogicalOr(m_Value(X), m_Value(Y))))) { - bool IsLogical = isa(Op1); - // LHS | (X || Y) --> (LHS || X) || Y - if (auto *Cmp = dyn_cast(X)) - if (Value *Res = - foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ false, IsLogical)) - return replaceInstUsesWith(I, IsLogical - ? Builder.CreateLogicalOr(Res, Y) - : Builder.CreateOr(Res, Y)); - // LHS | (X || Y) --> X || (LHS | Y) - if (auto *Cmp = dyn_cast(Y)) - if (Value *Res = foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ false, - /* IsLogical */ false)) - return replaceInstUsesWith(I, IsLogical - ? Builder.CreateLogicalOr(X, Res) - : Builder.CreateOr(X, Res)); - } - if (RHS && match(Op0, m_OneUse(m_LogicalOr(m_Value(X), m_Value(Y))))) { - bool IsLogical = isa(Op0); - // (X || Y) | RHS --> (X || RHS) || Y - if (auto *Cmp = dyn_cast(X)) - if (Value *Res = - foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ false, IsLogical)) - return replaceInstUsesWith(I, IsLogical - ? Builder.CreateLogicalOr(Res, Y) - : Builder.CreateOr(Res, Y)); - // (X || Y) | RHS --> X || (Y | RHS) - if (auto *Cmp = dyn_cast(Y)) - if (Value *Res = foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ false, - /* IsLogical */ false)) - return replaceInstUsesWith(I, IsLogical - ? Builder.CreateLogicalOr(X, Res) - : Builder.CreateOr(X, Res)); - } + // TODO: Make this recursive; it's a little tricky because an arbitrary + // number of 'or' instructions might have to be created. + if (match(Op1, m_OneUse(m_LogicalOr(m_Value(X), m_Value(Y))))) { + bool IsLogical = isa(Op1); + // Op0 | (X || Y) --> (Op0 || X) || Y + if (Value *Res = foldBooleanAndOr(Op0, X, I, /* IsAnd */ false, IsLogical)) + return replaceInstUsesWith(I, IsLogical ? Builder.CreateLogicalOr(Res, Y) + : Builder.CreateOr(Res, Y)); + // Op0 | (X || Y) --> X || (Op0 | Y) + if (Value *Res = foldBooleanAndOr(Op0, Y, I, /* IsAnd */ false, + /* IsLogical */ false)) + return replaceInstUsesWith(I, IsLogical ? Builder.CreateLogicalOr(X, Res) + : Builder.CreateOr(X, Res)); + } + if (match(Op0, m_OneUse(m_LogicalOr(m_Value(X), m_Value(Y))))) { + bool IsLogical = isa(Op0); + // (X || Y) | Op1 --> (X || Op1) || Y + if (Value *Res = foldBooleanAndOr(X, Op1, I, /* IsAnd */ false, IsLogical)) + return replaceInstUsesWith(I, IsLogical ? Builder.CreateLogicalOr(Res, Y) + : Builder.CreateOr(Res, Y)); + // (X || Y) | Op1 --> X || (Y | Op1) + if (Value *Res = foldBooleanAndOr(Y, Op1, I, /* IsAnd */ false, + /* IsLogical */ false)) + return replaceInstUsesWith(I, IsLogical ? Builder.CreateLogicalOr(X, Res) + : Builder.CreateOr(X, Res)); } if (Instruction *FoldedFCmps = reassociateFCmps(I, Builder)) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 42c0acd1e45ec..fd38738e3be80 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1736,9 +1736,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { Value *X; if (match(IIOperand, m_Neg(m_Value(X)))) return replaceOperand(*II, 0, X); - if (match(IIOperand, m_Select(m_Value(), m_Value(X), m_Neg(m_Deferred(X))))) - return replaceOperand(*II, 0, X); - if (match(IIOperand, m_Select(m_Value(), m_Neg(m_Value(X)), m_Deferred(X)))) + if (match(IIOperand, m_c_Select(m_Neg(m_Value(X)), m_Deferred(X)))) return replaceOperand(*II, 0, X); Value *Y; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 6c2554ea73b7f..7221c987b9821 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -786,15 +786,6 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) { } } - // Test if the trunc is the user of a select which is part of a - // minimum or maximum operation. If so, don't do any more simplification. - // Even simplifying demanded bits can break the canonical form of a - // min/max. - Value *LHS, *RHS; - if (SelectInst *Sel = dyn_cast(Src)) - if (matchSelectPattern(Sel, LHS, RHS).Flavor != SPF_UNKNOWN) - return nullptr; - // See if we can simplify any instructions used by the input whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(Trunc)) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 857724470f222..fed21db393ed2 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -8460,9 +8460,7 @@ Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) { case Instruction::Select: // fcmp eq (cond ? x : -x), 0 --> fcmp eq x, 0 if (FCmpInst::isEquality(Pred) && match(RHSC, m_AnyZeroFP()) && - (match(LHSI, - m_Select(m_Value(), m_Value(X), m_FNeg(m_Deferred(X)))) || - match(LHSI, m_Select(m_Value(), m_FNeg(m_Value(X)), m_Deferred(X))))) + match(LHSI, m_c_Select(m_FNeg(m_Value(X)), m_Deferred(X)))) return replaceOperand(I, 0, X); if (Instruction *NV = FoldOpIntoSelect(I, cast(LHSI))) return NV; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 9588930d7658c..0508ed48fc19c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -412,7 +412,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final bool IsAnd, bool IsLogical = false); Value *foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, BinaryOperator &Xor); - Value *foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd); + Value *foldEqOfParts(Value *Cmp0, Value *Cmp1, bool IsAnd); Value *foldAndOrOfICmpsUsingRanges(ICmpInst *ICmp1, ICmpInst *ICmp2, bool IsAnd); diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index 139e75dd3ddb3..22acf59c78a38 100644 --- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -158,8 +158,8 @@ static cl::opt static cl::opt ClGatedCallbacks( "sanitizer-coverage-gated-trace-callbacks", - cl::desc("Gate the invocation of the tracing callbacks on a global " - "variable. Currently only supported for trace-pc-guard."), + cl::desc("Gate the invocation of the tracing callbacks on a global variable" + ". Currently only supported for trace-pc-guard and trace-cmp."), cl::Hidden, cl::init(false)); namespace { @@ -234,7 +234,8 @@ class ModuleSanitizerCoverage { void instrumentFunction(Function &F); void InjectCoverageForIndirectCalls(Function &F, ArrayRef IndirCalls); - void InjectTraceForCmp(Function &F, ArrayRef CmpTraceTargets); + void InjectTraceForCmp(Function &F, ArrayRef CmpTraceTargets, + Value *&FunctionGateCmp); void InjectTraceForDiv(Function &F, ArrayRef DivTraceTargets); void InjectTraceForGep(Function &F, @@ -242,9 +243,10 @@ class ModuleSanitizerCoverage { void InjectTraceForLoadsAndStores(Function &F, ArrayRef Loads, ArrayRef Stores); void InjectTraceForSwitch(Function &F, - ArrayRef SwitchTraceTargets); + ArrayRef SwitchTraceTargets, + Value *&FunctionGateCmp); bool InjectCoverage(Function &F, ArrayRef AllBlocks, - bool IsLeafFunc = true); + Value *&FunctionGateCmp, bool IsLeafFunc); GlobalVariable *CreateFunctionLocalArrayInSection(size_t NumElements, Function &F, Type *Ty, const char *Section); @@ -254,7 +256,7 @@ class ModuleSanitizerCoverage { Instruction *I); Value *CreateFunctionLocalGateCmp(IRBuilder<> &IRB); void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx, - Value *&FunctionGateCmp, bool IsLeafFunc = true); + Value *&FunctionGateCmp, bool IsLeafFunc); Function *CreateInitCallsForSections(Module &M, const char *CtorName, const char *InitFunctionName, Type *Ty, const char *Section); @@ -494,9 +496,9 @@ bool ModuleSanitizerCoverage::instrumentModule() { SanCovLowestStack->setInitializer(Constant::getAllOnesValue(IntptrTy)); if (Options.GatedCallbacks) { - if (!Options.TracePCGuard) { + if (!Options.TracePCGuard && !Options.TraceCmp) { C->emitError(StringRef("'") + ClGatedCallbacks.ArgStr + - "' is only supported with trace-pc-guard"); + "' is only supported with trace-pc-guard or trace-cmp"); return true; } @@ -725,10 +727,11 @@ void ModuleSanitizerCoverage::instrumentFunction(Function &F) { if (Options.CollectControlFlow) createFunctionControlFlow(F); - InjectCoverage(F, BlocksToInstrument, IsLeafFunc); + Value *FunctionGateCmp = nullptr; + InjectCoverage(F, BlocksToInstrument, FunctionGateCmp, IsLeafFunc); InjectCoverageForIndirectCalls(F, IndirCalls); - InjectTraceForCmp(F, CmpTraceTargets); - InjectTraceForSwitch(F, SwitchTraceTargets); + InjectTraceForCmp(F, CmpTraceTargets, FunctionGateCmp); + InjectTraceForSwitch(F, SwitchTraceTargets, FunctionGateCmp); InjectTraceForDiv(F, DivTraceTargets); InjectTraceForGep(F, GepTraceTargets); InjectTraceForLoadsAndStores(F, Loads, Stores); @@ -837,10 +840,10 @@ Instruction *ModuleSanitizerCoverage::CreateGateBranch(Function &F, bool ModuleSanitizerCoverage::InjectCoverage(Function &F, ArrayRef AllBlocks, + Value *&FunctionGateCmp, bool IsLeafFunc) { if (AllBlocks.empty()) return false; CreateFunctionLocalArrays(F, AllBlocks); - Value *FunctionGateCmp = nullptr; for (size_t i = 0, N = AllBlocks.size(); i < N; i++) InjectCoverageAtBlock(F, *AllBlocks[i], i, FunctionGateCmp, IsLeafFunc); return true; @@ -874,7 +877,8 @@ void ModuleSanitizerCoverage::InjectCoverageForIndirectCalls( // {NumCases, ValueSizeInBits, Case0Value, Case1Value, Case2Value, ... }) void ModuleSanitizerCoverage::InjectTraceForSwitch( - Function &, ArrayRef SwitchTraceTargets) { + Function &F, ArrayRef SwitchTraceTargets, + Value *&FunctionGateCmp) { for (auto *I : SwitchTraceTargets) { if (SwitchInst *SI = dyn_cast(I)) { InstrumentationIRBuilder IRB(I); @@ -905,7 +909,13 @@ void ModuleSanitizerCoverage::InjectTraceForSwitch( *CurModule, ArrayOfInt64Ty, false, GlobalVariable::InternalLinkage, ConstantArray::get(ArrayOfInt64Ty, Initializers), "__sancov_gen_cov_switch_values"); - IRB.CreateCall(SanCovTraceSwitchFunction, {Cond, GV}); + if (Options.GatedCallbacks) { + auto GateBranch = CreateGateBranch(F, FunctionGateCmp, I); + IRBuilder<> GateIRB(GateBranch); + GateIRB.CreateCall(SanCovTraceSwitchFunction, {Cond, GV}); + } else { + IRB.CreateCall(SanCovTraceSwitchFunction, {Cond, GV}); + } } } } @@ -969,7 +979,8 @@ void ModuleSanitizerCoverage::InjectTraceForLoadsAndStores( } void ModuleSanitizerCoverage::InjectTraceForCmp( - Function &, ArrayRef CmpTraceTargets) { + Function &F, ArrayRef CmpTraceTargets, + Value *&FunctionGateCmp) { for (auto *I : CmpTraceTargets) { if (ICmpInst *ICMP = dyn_cast(I)) { InstrumentationIRBuilder IRB(ICMP); @@ -997,8 +1008,15 @@ void ModuleSanitizerCoverage::InjectTraceForCmp( } auto Ty = Type::getIntNTy(*C, TypeSize); - IRB.CreateCall(CallbackFunc, {IRB.CreateIntCast(A0, Ty, true), - IRB.CreateIntCast(A1, Ty, true)}); + if (Options.GatedCallbacks) { + auto GateBranch = CreateGateBranch(F, FunctionGateCmp, I); + IRBuilder<> GateIRB(GateBranch); + GateIRB.CreateCall(CallbackFunc, {GateIRB.CreateIntCast(A0, Ty, true), + GateIRB.CreateIntCast(A1, Ty, true)}); + } else { + IRB.CreateCall(CallbackFunc, {IRB.CreateIntCast(A0, Ty, true), + IRB.CreateIntCast(A1, Ty, true)}); + } } } } diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 1991ec82d1e1e..c7e814bced57d 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1662,21 +1662,43 @@ static bool areIdenticalUpToCommutativity(const Instruction *I1, /// \endcode /// /// So we need to turn hoisted load/store into cload/cstore. +/// +/// \param BI The branch instruction. +/// \param SpeculatedConditionalLoadsStores The load/store instructions that +/// will be speculated. +/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG. static void hoistConditionalLoadsStores( BranchInst *BI, SmallVectorImpl &SpeculatedConditionalLoadsStores, - bool Invert) { + std::optional Invert) { auto &Context = BI->getParent()->getContext(); auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1); auto *Cond = BI->getOperand(0); // Construct the condition if needed. BasicBlock *BB = BI->getParent(); - IRBuilder<> Builder(SpeculatedConditionalLoadsStores.back()); - Value *Mask = Builder.CreateBitCast( - Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond, - VCondTy); + IRBuilder<> Builder( + Invert.has_value() ? SpeculatedConditionalLoadsStores.back() : BI); + Value *Mask = nullptr; + Value *MaskFalse = nullptr; + Value *MaskTrue = nullptr; + if (Invert.has_value()) { + Mask = Builder.CreateBitCast( + *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond, + VCondTy); + } else { + MaskFalse = Builder.CreateBitCast( + Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy); + MaskTrue = Builder.CreateBitCast(Cond, VCondTy); + } + auto PeekThroughBitcasts = [](Value *V) { + while (auto *BitCast = dyn_cast(V)) + V = BitCast->getOperand(0); + return V; + }; for (auto *I : SpeculatedConditionalLoadsStores) { - IRBuilder<> Builder(I); + IRBuilder<> Builder(Invert.has_value() ? I : BI); + if (!Invert.has_value()) + Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse; // We currently assume conditional faulting load/store is supported for // scalar types only when creating new instructions. This can be easily // extended for vector types in the future. @@ -1688,12 +1710,14 @@ static void hoistConditionalLoadsStores( auto *Ty = I->getType(); PHINode *PN = nullptr; Value *PassThru = nullptr; - for (User *U : I->users()) - if ((PN = dyn_cast(U))) { - PassThru = Builder.CreateBitCast(PN->getIncomingValueForBlock(BB), - FixedVectorType::get(Ty, 1)); - break; - } + if (Invert.has_value()) + for (User *U : I->users()) + if ((PN = dyn_cast(U))) { + PassThru = Builder.CreateBitCast( + PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)), + FixedVectorType::get(Ty, 1)); + break; + } MaskedLoadStore = Builder.CreateMaskedLoad( FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru); Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty); @@ -1702,8 +1726,8 @@ static void hoistConditionalLoadsStores( I->replaceAllUsesWith(NewLoadStore); } else { // Handle Store. - auto *StoredVal = - Builder.CreateBitCast(Op0, FixedVectorType::get(Op0->getType(), 1)); + auto *StoredVal = Builder.CreateBitCast( + PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1)); MaskedLoadStore = Builder.CreateMaskedStore( StoredVal, I->getOperand(1), cast(I)->getAlign(), Mask); } @@ -3155,7 +3179,8 @@ static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, return HaveRewritablePHIs; } -static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert, +static bool isProfitableToSpeculate(const BranchInst *BI, + std::optional Invert, const TargetTransformInfo &TTI) { // If the branch is non-unpredictable, and is predicted to *not* branch to // the `then` block, then avoid speculating it. @@ -3166,7 +3191,10 @@ static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert, if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0) return true; - uint64_t EndWeight = Invert ? TWeight : FWeight; + if (!Invert.has_value()) + return false; + + uint64_t EndWeight = *Invert ? TWeight : FWeight; BranchProbability BIEndProb = BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight); BranchProbability Likely = TTI.getPredictableBranchThreshold(); @@ -3814,10 +3842,7 @@ static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, // These can often be turned into switches and other things. auto IsBinOpOrAnd = [](Value *V) { return match( - V, m_CombineOr( - m_BinOp(), - m_CombineOr(m_Select(m_Value(), m_ImmConstant(), m_Value()), - m_Select(m_Value(), m_Value(), m_ImmConstant())))); + V, m_CombineOr(m_BinOp(), m_c_Select(m_ImmConstant(), m_Value()))); }; if (PN->getType()->isIntegerTy(1) && (IsBinOpOrAnd(PN->getIncomingValue(0)) || @@ -8034,6 +8059,35 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (HoistCommon && hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts)) return requestResimplify(); + + if (BI && HoistLoadsStoresWithCondFaulting && + Options.HoistLoadsStoresWithCondFaulting && + isProfitableToSpeculate(BI, std::nullopt, TTI)) { + SmallVector SpeculatedConditionalLoadsStores; + auto CanSpeculateConditionalLoadsStores = [&]() { + for (auto *Succ : successors(BB)) { + for (Instruction &I : *Succ) { + if (I.isTerminator()) { + if (I.getNumSuccessors() > 1) + return false; + continue; + } else if (!isSafeCheapLoadStore(&I, TTI) || + SpeculatedConditionalLoadsStores.size() == + HoistLoadsStoresWithCondFaultingThreshold) { + return false; + } + SpeculatedConditionalLoadsStores.push_back(&I); + } + } + return !SpeculatedConditionalLoadsStores.empty(); + }; + + if (CanSpeculateConditionalLoadsStores()) { + hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, + std::nullopt); + return requestResimplify(); + } + } } else { // If Successor #1 has multiple preds, we may be able to conditionally // execute Successor #0 if it branches to Successor #1. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index a6b5235235ff3..fbcf181a45a66 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -234,9 +234,9 @@ class VPBuilder { VPDerivedIVRecipe *createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, - VPValue *Step) { + VPValue *Step, const Twine &Name = "") { return tryInsertInstruction( - new VPDerivedIVRecipe(Kind, FPBinOp, Start, CanonicalIV, Step)); + new VPDerivedIVRecipe(Kind, FPBinOp, Start, CanonicalIV, Step, Name)); } VPScalarCastRecipe *createScalarCast(Instruction::CastOps Opcode, VPValue *Op, diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index d13770a35c108..d68a26251ac9d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9603,7 +9603,7 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) { Value *DerivedIV = emitTransformedIndex( State.Builder, CanonicalIV, getStartValue()->getLiveInIRValue(), Step, Kind, cast_if_present(FPBinOp)); - DerivedIV->setName("offset.idx"); + DerivedIV->setName(Name); assert(DerivedIV != CanonicalIV && "IV didn't need transforming?"); State.set(this, DerivedIV, VPLane(0)); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index d033b7c2ef4a9..115cbd4d2ce5e 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1143,9 +1143,7 @@ static void addMask(SmallVectorImpl &Mask, ArrayRef SubMask, assert( (!ExtendingManyInputs || SubMask.size() > Mask.size() || // Check if input scalars were extended to match the size of other node. - (SubMask.size() == Mask.size() && - std::all_of(std::next(Mask.begin(), Mask.size() / 2), Mask.end(), - [](int Idx) { return Idx == PoisonMaskElem; }))) && + (SubMask.size() == Mask.size() && Mask.back() == PoisonMaskElem)) && "SubMask with many inputs support must be larger than the mask."); if (Mask.empty()) { Mask.append(SubMask.begin(), SubMask.end()); @@ -10877,9 +10875,10 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { CommonMask[Idx] = Idx; // Add subvectors permutation cost. if (!SubVectorsMask.empty()) { - assert(SubVectorsMask.size() == CommonMask.size() && + assert(SubVectorsMask.size() <= CommonMask.size() && "Expected same size of masks for subvectors and common mask."); - SmallVector SVMask(SubVectorsMask.begin(), SubVectorsMask.end()); + SmallVector SVMask(CommonMask.size(), PoisonMaskElem); + copy(SubVectorsMask, SVMask.begin()); for (auto [I1, I2] : zip(SVMask, CommonMask)) { if (I2 != PoisonMaskElem) { assert(I1 == PoisonMaskElem && "Expected unused subvectors mask"); @@ -12035,7 +12034,14 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const { if (VectorizableTree.back()->isGather() && VectorizableTree.back()->isAltShuffle() && VectorizableTree.back()->getVectorFactor() > 2 && - allSameBlock(VectorizableTree.back()->Scalars)) + allSameBlock(VectorizableTree.back()->Scalars) && + !VectorizableTree.back()->Scalars.front()->getType()->isVectorTy() && + TTI->getScalarizationOverhead( + getWidenedType(VectorizableTree.back()->Scalars.front()->getType(), + VectorizableTree.back()->getVectorFactor()), + APInt::getAllOnes(VectorizableTree.back()->getVectorFactor()), + /*Insert=*/true, /*Extract=*/false, + TTI::TCK_RecipThroughput) > -SLPCostThreshold) return false; // Otherwise, we can't vectorize the tree. It is both tiny and not fully @@ -14374,7 +14380,8 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { if (SubVectorsMask.empty()) { Vec = CreateSubVectors(Vec, CommonMask); } else { - SmallVector SVMask(SubVectorsMask.begin(), SubVectorsMask.end()); + SmallVector SVMask(CommonMask.size(), PoisonMaskElem); + copy(SubVectorsMask, SVMask.begin()); for (auto [I1, I2] : zip(SVMask, CommonMask)) { if (I2 != PoisonMaskElem) { assert(I1 == PoisonMaskElem && "Expected unused subvectors mask"); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index a24a86b4201c3..529108a5aaa97 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -478,32 +478,23 @@ void VPIRBasicBlock::execute(VPTransformState *State) { void VPBasicBlock::execute(VPTransformState *State) { bool Replica = bool(State->Lane); - VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB; - VPBlockBase *SingleHPred = nullptr; BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible. - auto IsLoopRegion = [](VPBlockBase *BB) { - auto *R = dyn_cast(BB); - return R && !R->isReplicator(); + auto IsReplicateRegion = [](VPBlockBase *BB) { + auto *R = dyn_cast_or_null(BB); + return R && R->isReplicator(); }; // 1. Create an IR basic block. - if (PrevVPBB && /* A */ - !((SingleHPred = getSingleHierarchicalPredecessor()) && - SingleHPred->getExitingBasicBlock() == PrevVPBB && - PrevVPBB->getSingleHierarchicalSuccessor() && - (SingleHPred->getParent() == getEnclosingLoopRegion() && - !IsLoopRegion(SingleHPred))) && /* B */ - !(Replica && getPredecessors().empty())) { /* C */ - // The last IR basic block is reused, as an optimization, in three cases: - // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null; - // B. when the current VPBB has a single (hierarchical) predecessor which - // is PrevVPBB and the latter has a single (hierarchical) successor which - // both are in the same non-replicator region; and - // C. when the current VPBB is an entry of a region replica - where PrevVPBB - // is the exiting VPBB of this region from a previous instance, or the - // predecessor of this region. - + if (this == getPlan()->getVectorPreheader() || + (Replica && this == getParent()->getEntry()) || + IsReplicateRegion(getSingleHierarchicalPredecessor())) { + // Reuse the previous basic block if the current VPBB is either + // * the vector preheader, + // * the entry to a replicate region, or + // * the exit of a replicate region. + State->CFG.VPBB2IRBB[this] = NewBB; + } else { NewBB = createEmptyBasicBlock(State->CFG); State->Builder.SetInsertPoint(NewBB); @@ -518,8 +509,6 @@ void VPBasicBlock::execute(VPTransformState *State) { State->CFG.PrevBB = NewBB; State->CFG.VPBB2IRBB[this] = NewBB; connectToPredecessors(State->CFG); - } else { - State->CFG.VPBB2IRBB[this] = NewBB; } // 2. Fill the IR basic block with IR instructions. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 70221e7af7dbb..1b1630ebc6c23 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -643,7 +643,7 @@ class VPBlockBase { virtual void dropAllReferences(VPValue *NewValue) = 0; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - void printAsOperand(raw_ostream &OS, bool PrintType) const { + void printAsOperand(raw_ostream &OS, bool PrintType = false) const { OS << getName(); } @@ -1412,7 +1412,7 @@ class VPIRInstruction : public VPRecipeBase { InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override; - Instruction &getInstruction() { return I; } + Instruction &getInstruction() const { return I; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. @@ -3301,19 +3301,23 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe { /// for floating point inductions. const FPMathOperator *FPBinOp; + /// Name to use for the generated IR instruction for the derived IV. + std::string Name; + public: VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, - VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step) + VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, + const Twine &Name = "") : VPDerivedIVRecipe( IndDesc.getKind(), dyn_cast_or_null(IndDesc.getInductionBinOp()), - Start, CanonicalIV, Step) {} + Start, CanonicalIV, Step, Name) {} VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV, - VPValue *Step) + VPValue *Step, const Twine &Name = "") : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind), - FPBinOp(FPBinOp) {} + FPBinOp(FPBinOp), Name(Name.str()) {} ~VPDerivedIVRecipe() override = default; @@ -3821,12 +3825,17 @@ class VPlan { VPBasicBlock *getEntry() { return Entry; } const VPBasicBlock *getEntry() const { return Entry; } - /// Return the VPIRBasicBlock wrapping the header of the scalar loop. - VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; } + /// Returns the preheader of the vector loop region. + VPBasicBlock *getVectorPreheader() { + return cast(getVectorLoopRegion()->getSinglePredecessor()); + } - /// Return the VPBasicBlock for the preheader of the scalar loop. - VPBasicBlock *getScalarPreheader() const { - return cast(ScalarHeader->getSinglePredecessor()); + /// Returns the VPRegionBlock of the vector loop. + VPRegionBlock *getVectorLoopRegion() { + return cast(getEntry()->getSingleSuccessor()); + } + const VPRegionBlock *getVectorLoopRegion() const { + return cast(getEntry()->getSingleSuccessor()); } /// Returns the 'middle' block of the plan, that is the block that selects @@ -3839,6 +3848,14 @@ class VPlan { return cast(getVectorLoopRegion()->getSingleSuccessor()); } + /// Return the VPBasicBlock for the preheader of the scalar loop. + VPBasicBlock *getScalarPreheader() const { + return cast(ScalarHeader->getSinglePredecessor()); + } + + /// Return the VPIRBasicBlock wrapping the header of the scalar loop. + VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; } + /// Return an iterator range over the VPIRBasicBlock wrapping the exit blocks /// of the VPlan, that is leaf nodes except the scalar header. Defined in /// VPlanHCFG, as the definition of the type needs access to the definitions @@ -3949,19 +3966,6 @@ class VPlan { LLVM_DUMP_METHOD void dump() const; #endif - /// Returns the VPRegionBlock of the vector loop. - VPRegionBlock *getVectorLoopRegion() { - return cast(getEntry()->getSingleSuccessor()); - } - const VPRegionBlock *getVectorLoopRegion() const { - return cast(getEntry()->getSingleSuccessor()); - } - - /// Returns the preheader of the vector loop region. - VPBasicBlock *getVectorPreheader() { - return cast(getVectorLoopRegion()->getSinglePredecessor()); - } - /// Returns the canonical induction recipe of the vector loop. VPCanonicalIVPHIRecipe *getCanonicalIV() { VPBasicBlock *EntryVPBB = getVectorLoopRegion()->getEntryBasicBlock(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 8b8ab6be99b0d..cb42cfe8159b0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -93,34 +93,19 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenRecipe *R) { unsigned Opcode = R->getOpcode(); - switch (Opcode) { - case Instruction::ICmp: - case Instruction::FCmp: - return IntegerType::get(Ctx, 1); - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::SRem: - case Instruction::URem: - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::FDiv: - case Instruction::FRem: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { + if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) || + Instruction::isBitwiseLogicOp(Opcode)) { Type *ResTy = inferScalarType(R->getOperand(0)); assert(ResTy == inferScalarType(R->getOperand(1)) && "types for both operands must match for binary op"); CachedTypes[R->getOperand(1)] = ResTy; return ResTy; } + + switch (Opcode) { + case Instruction::ICmp: + case Instruction::FCmp: + return IntegerType::get(Ctx, 1); case Instruction::FNeg: case Instruction::Freeze: return inferScalarType(R->getOperand(0)); @@ -157,36 +142,26 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenSelectRecipe *R) { } Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPReplicateRecipe *R) { - switch (R->getUnderlyingInstr()->getOpcode()) { - case Instruction::Call: { - unsigned CallIdx = R->getNumOperands() - (R->isPredicated() ? 2 : 1); - return cast(R->getOperand(CallIdx)->getLiveInIRValue()) - ->getReturnType(); - } - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::SRem: - case Instruction::URem: - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::FDiv: - case Instruction::FRem: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { + unsigned Opcode = R->getUnderlyingInstr()->getOpcode(); + + if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) || + Instruction::isBitwiseLogicOp(Opcode)) { Type *ResTy = inferScalarType(R->getOperand(0)); assert(ResTy == inferScalarType(R->getOperand(1)) && "inferred types for operands of binary op don't match"); CachedTypes[R->getOperand(1)] = ResTy; return ResTy; } + + if (Instruction::isCast(Opcode)) + return R->getUnderlyingInstr()->getType(); + + switch (Opcode) { + case Instruction::Call: { + unsigned CallIdx = R->getNumOperands() - (R->isPredicated() ? 2 : 1); + return cast(R->getOperand(CallIdx)->getLiveInIRValue()) + ->getReturnType(); + } case Instruction::Select: { Type *ResTy = inferScalarType(R->getOperand(1)); assert(ResTy == inferScalarType(R->getOperand(2)) && @@ -197,21 +172,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPReplicateRecipe *R) { case Instruction::ICmp: case Instruction::FCmp: return IntegerType::get(Ctx, 1); - case Instruction::AddrSpaceCast: case Instruction::Alloca: - case Instruction::BitCast: - case Instruction::Trunc: - case Instruction::SExt: - case Instruction::ZExt: - case Instruction::FPExt: - case Instruction::FPTrunc: case Instruction::ExtractValue: - case Instruction::SIToFP: - case Instruction::UIToFP: - case Instruction::FPToSI: - case Instruction::FPToUI: - case Instruction::PtrToInt: - case Instruction::IntToPtr: return R->getUnderlyingInstr()->getType(); case Instruction::Freeze: case Instruction::FNeg: diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 24cf4666c62ce..b2ee31c3e240a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -859,7 +859,9 @@ void VPIRInstruction::print(raw_ostream &O, const Twine &Indent, if (getNumOperands() != 0) { assert(getNumOperands() == 1 && "can have at most 1 operand"); O << " (extra operand: "; - printOperands(O, SlotTracker); + getOperand(0)->printAsOperand(O, SlotTracker); + O << " from "; + getParent()->getPredecessors()[0]->printAsOperand(O); O << ")"; } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index ad609da210fd1..1d1029710c709 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -528,7 +528,8 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind, VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); VPSingleDefRecipe *BaseIV = CanonicalIV; if (!CanonicalIV->isCanonical(Kind, StartV, Step)) { - BaseIV = Builder.createDerivedIV(Kind, FPBinOp, StartV, CanonicalIV, Step); + BaseIV = Builder.createDerivedIV(Kind, FPBinOp, StartV, CanonicalIV, Step, + "offset.idx"); } // Truncate base induction if needed. diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 89f74540669e4..71c7d547ac7d9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -134,52 +134,43 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const { } return true; }; - for (const VPUser *U : EVL.users()) { - if (!TypeSwitch(U) - .Case( - [&](const VPWidenIntrinsicRecipe *S) { - return VerifyEVLUse(*S, S->getNumOperands() - 1); - }) - .Case([&](const VPWidenStoreEVLRecipe *S) { - return VerifyEVLUse(*S, 2); - }) - .Case( - [&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); }) - .Case([&](const VPWidenEVLRecipe *W) { - return VerifyEVLUse( - *W, Instruction::isUnaryOp(W->getOpcode()) ? 1 : 2); - }) - .Case([&](const VPReductionEVLRecipe *R) { - return VerifyEVLUse(*R, 2); - }) - .Case( - [&](const VPScalarCastRecipe *S) { return true; }) - .Case([&](const VPInstruction *I) { - if (I->getOpcode() != Instruction::Add) { - errs() - << "EVL is used as an operand in non-VPInstruction::Add\n"; - return false; - } - if (I->getNumUsers() != 1) { - errs() << "EVL is used in VPInstruction:Add with multiple " - "users\n"; - return false; - } - if (!isa(*I->users().begin())) { - errs() << "Result of VPInstruction::Add with EVL operand is " - "not used by VPEVLBasedIVPHIRecipe\n"; - return false; - } - return true; - }) - .Default([&](const VPUser *U) { - errs() << "EVL has unexpected user\n"; - return false; - })) { - return false; - } - } - return true; + return all_of(EVL.users(), [&VerifyEVLUse](VPUser *U) { + return TypeSwitch(U) + .Case([&](const VPWidenIntrinsicRecipe *S) { + return VerifyEVLUse(*S, S->getNumOperands() - 1); + }) + .Case( + [&](const VPRecipeBase *S) { return VerifyEVLUse(*S, 2); }) + .Case( + [&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); }) + .Case([&](const VPWidenEVLRecipe *W) { + return VerifyEVLUse(*W, + Instruction::isUnaryOp(W->getOpcode()) ? 1 : 2); + }) + .Case( + [&](const VPScalarCastRecipe *S) { return VerifyEVLUse(*S, 0); }) + .Case([&](const VPInstruction *I) { + if (I->getOpcode() != Instruction::Add) { + errs() << "EVL is used as an operand in non-VPInstruction::Add\n"; + return false; + } + if (I->getNumUsers() != 1) { + errs() << "EVL is used in VPInstruction:Add with multiple " + "users\n"; + return false; + } + if (!isa(*I->users().begin())) { + errs() << "Result of VPInstruction::Add with EVL operand is " + "not used by VPEVLBasedIVPHIRecipe\n"; + return false; + } + return true; + }) + .Default([&](const VPUser *U) { + errs() << "EVL has unexpected user\n"; + return false; + }); + }); } bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) { diff --git a/llvm/test/Analysis/CostModel/RISCV/abs.ll b/llvm/test/Analysis/CostModel/RISCV/abs.ll index 8f0dd7b0aefe9..7252716af8605 100644 --- a/llvm/test/Analysis/CostModel/RISCV/abs.ll +++ b/llvm/test/Analysis/CostModel/RISCV/abs.ll @@ -44,37 +44,37 @@ declare @llvm.abs.nxv64i8(, i1) define i32 @abs(i32 %arg) { ; CHECK-LABEL: 'abs' ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call @llvm.abs.nxv2i64( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call @llvm.abs.nxv4i64( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call @llvm.abs.nxv8i64( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = call @llvm.abs.nxv2i64( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %5 = call @llvm.abs.nxv4i64( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = call @llvm.abs.nxv8i64( undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %10 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call @llvm.abs.nxv2i32( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = call @llvm.abs.nxv4i32( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = call @llvm.abs.nxv8i32( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call @llvm.abs.nxv16i32( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %12 = call @llvm.abs.nxv4i32( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %13 = call @llvm.abs.nxv8i32( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %14 = call @llvm.abs.nxv16i32( undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %18 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %19 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = call @llvm.abs.nxv2i16( undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %21 = call @llvm.abs.nxv4i16( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = call @llvm.abs.nxv8i16( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = call @llvm.abs.nxv16i16( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = call @llvm.abs.nxv32i16( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = call @llvm.abs.nxv8i16( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %23 = call @llvm.abs.nxv16i16( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %24 = call @llvm.abs.nxv32i16( undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %27 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %28 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = call @llvm.abs.nxv8i8( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = call @llvm.abs.nxv16i8( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = call @llvm.abs.nxv32i8( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %32 = call @llvm.abs.nxv64i8( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %30 = call @llvm.abs.nxv16i8( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %31 = call @llvm.abs.nxv32i8( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %32 = call @llvm.abs.nxv64i8( undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false) diff --git a/llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll b/llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll index ea05464b08408..55db70ce1e912 100644 --- a/llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll +++ b/llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll @@ -157,6 +157,260 @@ define void @bitreverse() { ret void } +define void @ctlz() { +; NOZVBB-LABEL: 'ctlz' +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %1 = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %2 = call <4 x i8> @llvm.ctlz.v4i8(<4 x i8> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %3 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %4 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %5 = call @llvm.ctlz.nxv1i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %6 = call @llvm.ctlz.nxv2i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %7 = call @llvm.ctlz.nxv4i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %8 = call @llvm.ctlz.nxv8i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %9 = call @llvm.ctlz.nxv16i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %10 = call @llvm.ctlz.nxv32i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %11 = call @llvm.ctlz.nxv64i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %12 = call <2 x i16> @llvm.ctlz.v2i16(<2 x i16> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %13 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %14 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %15 = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %16 = call @llvm.ctlz.nxv1i16( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %17 = call @llvm.ctlz.nxv2i16( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %18 = call @llvm.ctlz.nxv4i16( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %19 = call @llvm.ctlz.nxv8i16( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %20 = call @llvm.ctlz.nxv16i16( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %21 = call @llvm.ctlz.nxv32i16( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %22 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %23 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %24 = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %25 = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %26 = call @llvm.ctlz.nxv1i32( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %27 = call @llvm.ctlz.nxv2i32( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %28 = call @llvm.ctlz.nxv4i32( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %29 = call @llvm.ctlz.nxv8i32( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %30 = call @llvm.ctlz.nxv16i32( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %31 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %32 = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %33 = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %34 = call <16 x i64> @llvm.ctlz.v16i64(<16 x i64> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %35 = call @llvm.ctlz.nxv1i64( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %36 = call @llvm.ctlz.nxv2i64( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %37 = call @llvm.ctlz.nxv4i64( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %38 = call @llvm.ctlz.nxv8i64( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %39 = call @llvm.ctlz.nxv16i64( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVBB-LABEL: 'ctlz' +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x i8> @llvm.ctlz.v4i8(<4 x i8> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call @llvm.ctlz.nxv1i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.ctlz.nxv2i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.ctlz.nxv4i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.ctlz.nxv8i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.ctlz.nxv16i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call @llvm.ctlz.nxv32i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call @llvm.ctlz.nxv64i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i16> @llvm.ctlz.v2i16(<2 x i16> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call @llvm.ctlz.nxv1i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call @llvm.ctlz.nxv2i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call @llvm.ctlz.nxv4i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = call @llvm.ctlz.nxv8i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = call @llvm.ctlz.nxv16i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call @llvm.ctlz.nxv32i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call @llvm.ctlz.nxv1i32( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call @llvm.ctlz.nxv2i32( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = call @llvm.ctlz.nxv4i32( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call @llvm.ctlz.nxv8i32( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call @llvm.ctlz.nxv16i32( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %31 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %32 = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = call <16 x i64> @llvm.ctlz.v16i64(<16 x i64> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = call @llvm.ctlz.nxv1i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = call @llvm.ctlz.nxv2i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = call @llvm.ctlz.nxv4i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = call @llvm.ctlz.nxv8i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %39 = call @llvm.ctlz.nxv16i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> undef, i1 false) + call <4 x i8> @llvm.ctlz.v4i8(<4 x i8> undef, i1 false) + call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> undef, i1 false) + call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> undef, i1 false) + call @llvm.ctlz.nxv1i8( undef, i1 false) + call @llvm.ctlz.nxv2i8( undef, i1 false) + call @llvm.ctlz.nxv4i8( undef, i1 false) + call @llvm.ctlz.nxv8i8( undef, i1 false) + call @llvm.ctlz.nxv16i8( undef, i1 false) + call @llvm.ctlz.nxv32i8( undef, i1 false) + call @llvm.ctlz.nxv64i8( undef, i1 false) + call <2 x i16> @llvm.ctlz.v2i16(<2 x i16> undef, i1 false) + call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> undef, i1 false) + call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> undef, i1 false) + call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> undef, i1 false) + call @llvm.ctlz.nxv1i16( undef, i1 false) + call @llvm.ctlz.nxv2i16( undef, i1 false) + call @llvm.ctlz.nxv4i16( undef, i1 false) + call @llvm.ctlz.nxv8i16( undef, i1 false) + call @llvm.ctlz.nxv16i16( undef, i1 false) + call @llvm.ctlz.nxv32i16( undef, i1 false) + call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> undef, i1 false) + call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> undef, i1 false) + call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> undef, i1 false) + call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> undef, i1 false) + call @llvm.ctlz.nxv1i32( undef, i1 false) + call @llvm.ctlz.nxv2i32( undef, i1 false) + call @llvm.ctlz.nxv4i32( undef, i1 false) + call @llvm.ctlz.nxv8i32( undef, i1 false) + call @llvm.ctlz.nxv16i32( undef, i1 false) + call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> undef, i1 false) + call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> undef, i1 false) + call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> undef, i1 false) + call <16 x i64> @llvm.ctlz.v16i64(<16 x i64> undef, i1 false) + call @llvm.ctlz.nxv1i64( undef, i1 false) + call @llvm.ctlz.nxv2i64( undef, i1 false) + call @llvm.ctlz.nxv4i64( undef, i1 false) + call @llvm.ctlz.nxv8i64( undef, i1 false) + call @llvm.ctlz.nxv16i64( undef, i1 false) + ret void +} + +define void @cttz() { +; NOZVBB-LABEL: 'cttz' +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %1 = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %2 = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %3 = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %4 = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = call @llvm.cttz.nxv1i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = call @llvm.cttz.nxv2i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %7 = call @llvm.cttz.nxv4i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %8 = call @llvm.cttz.nxv8i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %9 = call @llvm.cttz.nxv16i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %10 = call @llvm.cttz.nxv32i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %11 = call @llvm.cttz.nxv64i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %12 = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %13 = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %14 = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %15 = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %16 = call @llvm.cttz.nxv1i16( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %17 = call @llvm.cttz.nxv2i16( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %18 = call @llvm.cttz.nxv4i16( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %19 = call @llvm.cttz.nxv8i16( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %20 = call @llvm.cttz.nxv16i16( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %21 = call @llvm.cttz.nxv32i16( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %22 = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %23 = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %24 = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %25 = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %26 = call @llvm.cttz.nxv1i32( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %27 = call @llvm.cttz.nxv2i32( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %28 = call @llvm.cttz.nxv4i32( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %29 = call @llvm.cttz.nxv8i32( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %30 = call @llvm.cttz.nxv16i32( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %31 = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %32 = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %33 = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %34 = call <16 x i64> @llvm.cttz.v16i64(<16 x i64> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %35 = call @llvm.cttz.nxv1i64( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %36 = call @llvm.cttz.nxv2i64( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %37 = call @llvm.cttz.nxv4i64( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %38 = call @llvm.cttz.nxv8i64( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %39 = call @llvm.cttz.nxv16i64( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVBB-LABEL: 'cttz' +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call @llvm.cttz.nxv1i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.cttz.nxv2i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.cttz.nxv4i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.cttz.nxv8i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.cttz.nxv16i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call @llvm.cttz.nxv32i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call @llvm.cttz.nxv64i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call @llvm.cttz.nxv1i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call @llvm.cttz.nxv2i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call @llvm.cttz.nxv4i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = call @llvm.cttz.nxv8i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = call @llvm.cttz.nxv16i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call @llvm.cttz.nxv32i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call @llvm.cttz.nxv1i32( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call @llvm.cttz.nxv2i32( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = call @llvm.cttz.nxv4i32( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call @llvm.cttz.nxv8i32( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call @llvm.cttz.nxv16i32( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %31 = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %32 = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = call <16 x i64> @llvm.cttz.v16i64(<16 x i64> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = call @llvm.cttz.nxv1i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = call @llvm.cttz.nxv2i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = call @llvm.cttz.nxv4i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = call @llvm.cttz.nxv8i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %39 = call @llvm.cttz.nxv16i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + call <2 x i8> @llvm.cttz.v2i8(<2 x i8> undef, i1 false) + call <4 x i8> @llvm.cttz.v4i8(<4 x i8> undef, i1 false) + call <8 x i8> @llvm.cttz.v8i8(<8 x i8> undef, i1 false) + call <16 x i8> @llvm.cttz.v16i8(<16 x i8> undef, i1 false) + call @llvm.cttz.nxv1i8( undef, i1 false) + call @llvm.cttz.nxv2i8( undef, i1 false) + call @llvm.cttz.nxv4i8( undef, i1 false) + call @llvm.cttz.nxv8i8( undef, i1 false) + call @llvm.cttz.nxv16i8( undef, i1 false) + call @llvm.cttz.nxv32i8( undef, i1 false) + call @llvm.cttz.nxv64i8( undef, i1 false) + call <2 x i16> @llvm.cttz.v2i16(<2 x i16> undef, i1 false) + call <4 x i16> @llvm.cttz.v4i16(<4 x i16> undef, i1 false) + call <8 x i16> @llvm.cttz.v8i16(<8 x i16> undef, i1 false) + call <16 x i16> @llvm.cttz.v16i16(<16 x i16> undef, i1 false) + call @llvm.cttz.nxv1i16( undef, i1 false) + call @llvm.cttz.nxv2i16( undef, i1 false) + call @llvm.cttz.nxv4i16( undef, i1 false) + call @llvm.cttz.nxv8i16( undef, i1 false) + call @llvm.cttz.nxv16i16( undef, i1 false) + call @llvm.cttz.nxv32i16( undef, i1 false) + call <2 x i32> @llvm.cttz.v2i32(<2 x i32> undef, i1 false) + call <4 x i32> @llvm.cttz.v4i32(<4 x i32> undef, i1 false) + call <8 x i32> @llvm.cttz.v8i32(<8 x i32> undef, i1 false) + call <16 x i32> @llvm.cttz.v16i32(<16 x i32> undef, i1 false) + call @llvm.cttz.nxv1i32( undef, i1 false) + call @llvm.cttz.nxv2i32( undef, i1 false) + call @llvm.cttz.nxv4i32( undef, i1 false) + call @llvm.cttz.nxv8i32( undef, i1 false) + call @llvm.cttz.nxv16i32( undef, i1 false) + call <2 x i64> @llvm.cttz.v2i64(<2 x i64> undef, i1 false) + call <4 x i64> @llvm.cttz.v4i64(<4 x i64> undef, i1 false) + call <8 x i64> @llvm.cttz.v8i64(<8 x i64> undef, i1 false) + call <16 x i64> @llvm.cttz.v16i64(<16 x i64> undef, i1 false) + call @llvm.cttz.nxv1i64( undef, i1 false) + call @llvm.cttz.nxv2i64( undef, i1 false) + call @llvm.cttz.nxv4i64( undef, i1 false) + call @llvm.cttz.nxv8i64( undef, i1 false) + call @llvm.cttz.nxv16i64( undef, i1 false) + ret void +} + define void @ctpop() { ; NOZVBB-LABEL: 'ctpop' ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = call i8 @llvm.ctpop.i8(i8 undef) @@ -778,6 +1032,86 @@ declare @llvm.vp.bswap.nxv4i64(, @llvm.vp.bswap.nxv8i64(, , i32) declare @llvm.vp.bswap.nxv16i64(, , i32) +declare <2 x i8> @llvm.ctlz.v2i8(<2 x i8>, i1) +declare <4 x i8> @llvm.ctlz.v4i8(<4 x i8>, i1) +declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) +declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) +declare @llvm.ctlz.nxv1i8(, i1) +declare @llvm.ctlz.nxv2i8(, i1) +declare @llvm.ctlz.nxv4i8(, i1) +declare @llvm.ctlz.nxv8i8(, i1) +declare @llvm.ctlz.nxv16i8(, i1) +declare @llvm.ctlz.nxv32i8(, i1) +declare @llvm.ctlz.nxv64i8(, i1) +declare <2 x i16> @llvm.ctlz.v2i16(<2 x i16>, i1) +declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) +declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) +declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1) +declare @llvm.ctlz.nxv1i16(, i1) +declare @llvm.ctlz.nxv2i16(, i1) +declare @llvm.ctlz.nxv4i16(, i1) +declare @llvm.ctlz.nxv8i16(, i1) +declare @llvm.ctlz.nxv16i16(, i1) +declare @llvm.ctlz.nxv32i16(, i1) +declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) +declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) +declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1) +declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1) +declare @llvm.ctlz.nxv1i32(, i1) +declare @llvm.ctlz.nxv2i32(, i1) +declare @llvm.ctlz.nxv4i32(, i1) +declare @llvm.ctlz.nxv8i32(, i1) +declare @llvm.ctlz.nxv16i32(, i1) +declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) +declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) +declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1) +declare <16 x i64> @llvm.ctlz.v16i64(<16 x i64>, i1) +declare @llvm.ctlz.nxv1i64(, i1) +declare @llvm.ctlz.nxv2i64(, i1) +declare @llvm.ctlz.nxv4i64(, i1) +declare @llvm.ctlz.nxv8i64(, i1) +declare @llvm.ctlz.nxv16i64(, i1) + +declare <2 x i8> @llvm.cttz.v2i8(<2 x i8>, i1) +declare <4 x i8> @llvm.cttz.v4i8(<4 x i8>, i1) +declare <8 x i8> @llvm.cttz.v8i8(<8 x i8>, i1) +declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1) +declare @llvm.cttz.nxv1i8(, i1) +declare @llvm.cttz.nxv2i8(, i1) +declare @llvm.cttz.nxv4i8(, i1) +declare @llvm.cttz.nxv8i8(, i1) +declare @llvm.cttz.nxv16i8(, i1) +declare @llvm.cttz.nxv32i8(, i1) +declare @llvm.cttz.nxv64i8(, i1) +declare <2 x i16> @llvm.cttz.v2i16(<2 x i16>, i1) +declare <4 x i16> @llvm.cttz.v4i16(<4 x i16>, i1) +declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1) +declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>, i1) +declare @llvm.cttz.nxv1i16(, i1) +declare @llvm.cttz.nxv2i16(, i1) +declare @llvm.cttz.nxv4i16(, i1) +declare @llvm.cttz.nxv8i16(, i1) +declare @llvm.cttz.nxv16i16(, i1) +declare @llvm.cttz.nxv32i16(, i1) +declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) +declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) +declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1) +declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>, i1) +declare @llvm.cttz.nxv1i32(, i1) +declare @llvm.cttz.nxv2i32(, i1) +declare @llvm.cttz.nxv4i32(, i1) +declare @llvm.cttz.nxv8i32(, i1) +declare @llvm.cttz.nxv16i32(, i1) +declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) +declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1) +declare <8 x i64> @llvm.cttz.v8i64(<8 x i64>, i1) +declare <16 x i64> @llvm.cttz.v16i64(<16 x i64>, i1) +declare @llvm.cttz.nxv1i64(, i1) +declare @llvm.cttz.nxv2i64(, i1) +declare @llvm.cttz.nxv4i64(, i1) +declare @llvm.cttz.nxv8i64(, i1) +declare @llvm.cttz.nxv16i64(, i1) + declare <2 x i8> @llvm.vp.ctpop.v2i8(<2 x i8>, <2 x i1>, i32) declare <4 x i8> @llvm.vp.ctpop.v4i8(<4 x i8>, <4 x i1>, i32) declare <8 x i8> @llvm.vp.ctpop.v8i8(<8 x i8>, <8 x i1>, i32) diff --git a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll index 800ea223850d3..c7cd845a0a03f 100644 --- a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -passes="print" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v | FileCheck %s -; RUN: opt < %s -passes="print" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=TYPEBASED +; RUN: opt < %s -passes="print" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=TYPEBASED define void @unsupported_fp_ops( %vec, i32 %extraarg) { ; CHECK-LABEL: 'unsupported_fp_ops' @@ -1147,28 +1147,28 @@ define void @abs() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <2 x i64> @llvm.vp.abs.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call <4 x i64> @llvm.vp.abs.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = call <8 x i64> @llvm.vp.abs.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <16 x i64> @llvm.vp.abs.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %16 = call <16 x i64> @llvm.abs.v16i64(<16 x i64> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = call <4 x i64> @llvm.vp.abs.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %12 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %13 = call <8 x i64> @llvm.vp.abs.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %14 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %15 = call <16 x i64> @llvm.vp.abs.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %16 = call <16 x i64> @llvm.abs.v16i64(<16 x i64> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = call @llvm.vp.abs.nxv2i8( undef, i1 false, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = call @llvm.abs.nxv2i8( undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = call @llvm.vp.abs.nxv4i8( undef, i1 false, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = call @llvm.abs.nxv4i8( undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %21 = call @llvm.vp.abs.nxv8i8( undef, i1 false, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = call @llvm.abs.nxv8i8( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = call @llvm.vp.abs.nxv16i8( undef, i1 false, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = call @llvm.abs.nxv16i8( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = call @llvm.vp.abs.nxv2i64( undef, i1 false, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = call @llvm.abs.nxv2i64( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = call @llvm.vp.abs.nxv4i64( undef, i1 false, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call @llvm.abs.nxv4i64( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = call @llvm.vp.abs.nxv8i64( undef, i1 false, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = call @llvm.abs.nxv8i64( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = call @llvm.vp.abs.nxv16i64( undef, i1 false, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = call @llvm.abs.nxv16i64( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = call @llvm.vp.abs.nxv16i8( undef, i1 false, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %24 = call @llvm.abs.nxv16i8( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %25 = call @llvm.vp.abs.nxv2i64( undef, i1 false, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %26 = call @llvm.abs.nxv2i64( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %27 = call @llvm.vp.abs.nxv4i64( undef, i1 false, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %28 = call @llvm.abs.nxv4i64( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %29 = call @llvm.vp.abs.nxv8i64( undef, i1 false, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %30 = call @llvm.abs.nxv8i64( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %31 = call @llvm.vp.abs.nxv16i64( undef, i1 false, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %32 = call @llvm.abs.nxv16i64( undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; TYPEBASED-LABEL: 'abs' @@ -1182,28 +1182,28 @@ define void @abs() { ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 false) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <2 x i64> @llvm.vp.abs.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call <4 x i64> @llvm.vp.abs.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = call <8 x i64> @llvm.vp.abs.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <16 x i64> @llvm.vp.abs.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %16 = call <16 x i64> @llvm.abs.v16i64(<16 x i64> undef, i1 false) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = call <4 x i64> @llvm.vp.abs.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %12 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %13 = call <8 x i64> @llvm.vp.abs.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %14 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %15 = call <16 x i64> @llvm.vp.abs.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %16 = call <16 x i64> @llvm.abs.v16i64(<16 x i64> undef, i1 false) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = call @llvm.vp.abs.nxv2i8( undef, i1 false, undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = call @llvm.abs.nxv2i8( undef, i1 false) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = call @llvm.vp.abs.nxv4i8( undef, i1 false, undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = call @llvm.abs.nxv4i8( undef, i1 false) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %21 = call @llvm.vp.abs.nxv8i8( undef, i1 false, undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = call @llvm.abs.nxv8i8( undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = call @llvm.vp.abs.nxv16i8( undef, i1 false, undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = call @llvm.abs.nxv16i8( undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = call @llvm.vp.abs.nxv2i64( undef, i1 false, undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = call @llvm.abs.nxv2i64( undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = call @llvm.vp.abs.nxv4i64( undef, i1 false, undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call @llvm.abs.nxv4i64( undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = call @llvm.vp.abs.nxv8i64( undef, i1 false, undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = call @llvm.abs.nxv8i64( undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = call @llvm.vp.abs.nxv16i64( undef, i1 false, undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = call @llvm.abs.nxv16i64( undef, i1 false) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = call @llvm.vp.abs.nxv16i8( undef, i1 false, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %24 = call @llvm.abs.nxv16i8( undef, i1 false) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %25 = call @llvm.vp.abs.nxv2i64( undef, i1 false, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %26 = call @llvm.abs.nxv2i64( undef, i1 false) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %27 = call @llvm.vp.abs.nxv4i64( undef, i1 false, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %28 = call @llvm.abs.nxv4i64( undef, i1 false) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %29 = call @llvm.vp.abs.nxv8i64( undef, i1 false, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %30 = call @llvm.abs.nxv8i64( undef, i1 false) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %31 = call @llvm.vp.abs.nxv16i64( undef, i1 false, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %32 = call @llvm.abs.nxv16i64( undef, i1 false) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call <2 x i8> @llvm.vp.abs.v2i8(<2 x i8> undef, i1 0, <2 x i1> undef, i32 undef) @@ -2125,6 +2125,232 @@ define void @vp_fdiv(){ ret void } +define void @splat() { +; CHECK-LABEL: 'splat' +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %1 = call <2 x i1> @llvm.experimental.vp.splat.v2i1(i1 undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %2 = call <4 x i1> @llvm.experimental.vp.splat.v4i1(i1 undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %3 = call <8 x i1> @llvm.experimental.vp.splat.v8i1(i1 undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %4 = call <16 x i1> @llvm.experimental.vp.splat.v16i1(i1 undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <2 x i8> @llvm.experimental.vp.splat.v2i8(i8 undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <4 x i8> @llvm.experimental.vp.splat.v4i8(i8 undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <8 x i8> @llvm.experimental.vp.splat.v8i8(i8 undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <16 x i8> @llvm.experimental.vp.splat.v16i8(i8 undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call <2 x i16> @llvm.experimental.vp.splat.v2i16(i16 undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call <4 x i16> @llvm.experimental.vp.splat.v4i16(i16 undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call <8 x i16> @llvm.experimental.vp.splat.v8i16(i16 undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = call <16 x i16> @llvm.experimental.vp.splat.v16i16(i16 undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <2 x i32> @llvm.experimental.vp.splat.v2i32(i32 undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <4 x i32> @llvm.experimental.vp.splat.v4i32(i32 undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <8 x i32> @llvm.experimental.vp.splat.v8i32(i32 undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %16 = call <16 x i32> @llvm.experimental.vp.splat.v16i32(i32 undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call <2 x i64> @llvm.experimental.vp.splat.v2i64(i64 undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = call <4 x i64> @llvm.experimental.vp.splat.v4i64(i64 undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %19 = call <8 x i64> @llvm.experimental.vp.splat.v8i64(i64 undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %20 = call <16 x i64> @llvm.experimental.vp.splat.v16i64(i64 undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call <2 x bfloat> @llvm.experimental.vp.splat.v2bf16(bfloat undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <4 x bfloat> @llvm.experimental.vp.splat.v4bf16(bfloat undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call <8 x bfloat> @llvm.experimental.vp.splat.v8bf16(bfloat undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = call <16 x bfloat> @llvm.experimental.vp.splat.v16bf16(bfloat undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call <2 x half> @llvm.experimental.vp.splat.v2f16(half undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call <4 x half> @llvm.experimental.vp.splat.v4f16(half undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call <8 x half> @llvm.experimental.vp.splat.v8f16(half undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call <16 x half> @llvm.experimental.vp.splat.v16f16(half undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call <2 x float> @llvm.experimental.vp.splat.v2f32(float undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call <4 x float> @llvm.experimental.vp.splat.v4f32(float undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = call <8 x float> @llvm.experimental.vp.splat.v8f32(float undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = call <16 x float> @llvm.experimental.vp.splat.v16f32(float undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = call <2 x double> @llvm.experimental.vp.splat.v2f64(double undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %34 = call <4 x double> @llvm.experimental.vp.splat.v4f64(double undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %35 = call <8 x double> @llvm.experimental.vp.splat.v8f64(double undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %36 = call <16 x double> @llvm.experimental.vp.splat.v16f64(double undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %37 = call @llvm.experimental.vp.splat.nxv2i1(i1 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %38 = call @llvm.experimental.vp.splat.nxv4i1(i1 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %39 = call @llvm.experimental.vp.splat.nxv8i1(i1 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %40 = call @llvm.experimental.vp.splat.nxv16i1(i1 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %41 = call @llvm.experimental.vp.splat.nxv2i8(i8 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %42 = call @llvm.experimental.vp.splat.nxv4i8(i8 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %43 = call @llvm.experimental.vp.splat.nxv8i8(i8 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %44 = call @llvm.experimental.vp.splat.nxv16i8(i8 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %45 = call @llvm.experimental.vp.splat.nxv2i16(i16 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %46 = call @llvm.experimental.vp.splat.nxv4i16(i16 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %47 = call @llvm.experimental.vp.splat.nxv8i16(i16 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %48 = call @llvm.experimental.vp.splat.nxv16i16(i16 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %49 = call @llvm.experimental.vp.splat.nxv2i32(i32 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %50 = call @llvm.experimental.vp.splat.nxv4i32(i32 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %51 = call @llvm.experimental.vp.splat.nxv8i32(i32 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %52 = call @llvm.experimental.vp.splat.nxv16i32(i32 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %53 = call @llvm.experimental.vp.splat.nxv2i64(i64 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %54 = call @llvm.experimental.vp.splat.nxv4i64(i64 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %55 = call @llvm.experimental.vp.splat.nxv8i64(i64 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %56 = call @llvm.experimental.vp.splat.nxv16i64(i64 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %57 = call @llvm.experimental.vp.splat.nxv2bf16(bfloat undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %58 = call @llvm.experimental.vp.splat.nxv4bf16(bfloat undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %59 = call @llvm.experimental.vp.splat.nxv8bf16(bfloat undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %60 = call @llvm.experimental.vp.splat.nxv16bf16(bfloat undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %61 = call @llvm.experimental.vp.splat.nxv2f16(half undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %62 = call @llvm.experimental.vp.splat.nxv4f16(half undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %63 = call @llvm.experimental.vp.splat.nxv8f16(half undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %64 = call @llvm.experimental.vp.splat.nxv16f16(half undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %65 = call @llvm.experimental.vp.splat.nxv2f32(float undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %66 = call @llvm.experimental.vp.splat.nxv4f32(float undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %67 = call @llvm.experimental.vp.splat.nxv8f32(float undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %68 = call @llvm.experimental.vp.splat.nxv16f32(float undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %69 = call @llvm.experimental.vp.splat.nxv2f64(double undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %70 = call @llvm.experimental.vp.splat.nxv4f64(double undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %71 = call @llvm.experimental.vp.splat.nxv8f64(double undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %72 = call @llvm.experimental.vp.splat.nxv16f64(double undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; TYPEBASED-LABEL: 'splat' +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %1 = call <2 x i1> @llvm.experimental.vp.splat.v2i1(i1 undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %2 = call <4 x i1> @llvm.experimental.vp.splat.v4i1(i1 undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %3 = call <8 x i1> @llvm.experimental.vp.splat.v8i1(i1 undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %4 = call <16 x i1> @llvm.experimental.vp.splat.v16i1(i1 undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <2 x i8> @llvm.experimental.vp.splat.v2i8(i8 undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <4 x i8> @llvm.experimental.vp.splat.v4i8(i8 undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <8 x i8> @llvm.experimental.vp.splat.v8i8(i8 undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <16 x i8> @llvm.experimental.vp.splat.v16i8(i8 undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call <2 x i16> @llvm.experimental.vp.splat.v2i16(i16 undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call <4 x i16> @llvm.experimental.vp.splat.v4i16(i16 undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call <8 x i16> @llvm.experimental.vp.splat.v8i16(i16 undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = call <16 x i16> @llvm.experimental.vp.splat.v16i16(i16 undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <2 x i32> @llvm.experimental.vp.splat.v2i32(i32 undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <4 x i32> @llvm.experimental.vp.splat.v4i32(i32 undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <8 x i32> @llvm.experimental.vp.splat.v8i32(i32 undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %16 = call <16 x i32> @llvm.experimental.vp.splat.v16i32(i32 undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call <2 x i64> @llvm.experimental.vp.splat.v2i64(i64 undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = call <4 x i64> @llvm.experimental.vp.splat.v4i64(i64 undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %19 = call <8 x i64> @llvm.experimental.vp.splat.v8i64(i64 undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %20 = call <16 x i64> @llvm.experimental.vp.splat.v16i64(i64 undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call <2 x bfloat> @llvm.experimental.vp.splat.v2bf16(bfloat undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <4 x bfloat> @llvm.experimental.vp.splat.v4bf16(bfloat undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call <8 x bfloat> @llvm.experimental.vp.splat.v8bf16(bfloat undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = call <16 x bfloat> @llvm.experimental.vp.splat.v16bf16(bfloat undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call <2 x half> @llvm.experimental.vp.splat.v2f16(half undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call <4 x half> @llvm.experimental.vp.splat.v4f16(half undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call <8 x half> @llvm.experimental.vp.splat.v8f16(half undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call <16 x half> @llvm.experimental.vp.splat.v16f16(half undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call <2 x float> @llvm.experimental.vp.splat.v2f32(float undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call <4 x float> @llvm.experimental.vp.splat.v4f32(float undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = call <8 x float> @llvm.experimental.vp.splat.v8f32(float undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = call <16 x float> @llvm.experimental.vp.splat.v16f32(float undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = call <2 x double> @llvm.experimental.vp.splat.v2f64(double undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %34 = call <4 x double> @llvm.experimental.vp.splat.v4f64(double undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %35 = call <8 x double> @llvm.experimental.vp.splat.v8f64(double undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %36 = call <16 x double> @llvm.experimental.vp.splat.v16f64(double undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %37 = call @llvm.experimental.vp.splat.nxv2i1(i1 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %38 = call @llvm.experimental.vp.splat.nxv4i1(i1 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %39 = call @llvm.experimental.vp.splat.nxv8i1(i1 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %40 = call @llvm.experimental.vp.splat.nxv16i1(i1 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %41 = call @llvm.experimental.vp.splat.nxv2i8(i8 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %42 = call @llvm.experimental.vp.splat.nxv4i8(i8 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %43 = call @llvm.experimental.vp.splat.nxv8i8(i8 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %44 = call @llvm.experimental.vp.splat.nxv16i8(i8 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %45 = call @llvm.experimental.vp.splat.nxv2i16(i16 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %46 = call @llvm.experimental.vp.splat.nxv4i16(i16 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %47 = call @llvm.experimental.vp.splat.nxv8i16(i16 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %48 = call @llvm.experimental.vp.splat.nxv16i16(i16 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %49 = call @llvm.experimental.vp.splat.nxv2i32(i32 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %50 = call @llvm.experimental.vp.splat.nxv4i32(i32 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %51 = call @llvm.experimental.vp.splat.nxv8i32(i32 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %52 = call @llvm.experimental.vp.splat.nxv16i32(i32 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %53 = call @llvm.experimental.vp.splat.nxv2i64(i64 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %54 = call @llvm.experimental.vp.splat.nxv4i64(i64 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %55 = call @llvm.experimental.vp.splat.nxv8i64(i64 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %56 = call @llvm.experimental.vp.splat.nxv16i64(i64 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %57 = call @llvm.experimental.vp.splat.nxv2bf16(bfloat undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %58 = call @llvm.experimental.vp.splat.nxv4bf16(bfloat undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %59 = call @llvm.experimental.vp.splat.nxv8bf16(bfloat undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %60 = call @llvm.experimental.vp.splat.nxv16bf16(bfloat undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %61 = call @llvm.experimental.vp.splat.nxv2f16(half undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %62 = call @llvm.experimental.vp.splat.nxv4f16(half undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %63 = call @llvm.experimental.vp.splat.nxv8f16(half undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %64 = call @llvm.experimental.vp.splat.nxv16f16(half undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %65 = call @llvm.experimental.vp.splat.nxv2f32(float undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %66 = call @llvm.experimental.vp.splat.nxv4f32(float undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %67 = call @llvm.experimental.vp.splat.nxv8f32(float undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %68 = call @llvm.experimental.vp.splat.nxv16f32(float undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %69 = call @llvm.experimental.vp.splat.nxv2f64(double undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %70 = call @llvm.experimental.vp.splat.nxv4f64(double undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %71 = call @llvm.experimental.vp.splat.nxv8f64(double undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %72 = call @llvm.experimental.vp.splat.nxv16f64(double undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + call <2 x i1> @llvm.experimental.vp.splat.v2i1(i1 undef, <2 x i1> undef, i32 undef) + call <4 x i1> @llvm.experimental.vp.splat.v4i1(i1 undef, <4 x i1> undef, i32 undef) + call <8 x i1> @llvm.experimental.vp.splat.v8i1(i1 undef, <8 x i1> undef, i32 undef) + call <16 x i1> @llvm.experimental.vp.splat.v16i1(i1 undef, <16 x i1> undef, i32 undef) + call <2 x i8> @llvm.experimental.vp.splat.v2i8(i8 undef, <2 x i1> undef, i32 undef) + call <4 x i8> @llvm.experimental.vp.splat.v4i8(i8 undef, <4 x i1> undef, i32 undef) + call <8 x i8> @llvm.experimental.vp.splat.v8i8(i8 undef, <8 x i1> undef, i32 undef) + call <16 x i8> @llvm.experimental.vp.splat.v16i8(i8 undef, <16 x i1> undef, i32 undef) + call <2 x i16> @llvm.experimental.vp.splat.v2i16(i16 undef, <2 x i1> undef, i32 undef) + call <4 x i16> @llvm.experimental.vp.splat.v4i16(i16 undef, <4 x i1> undef, i32 undef) + call <8 x i16> @llvm.experimental.vp.splat.v8i16(i16 undef, <8 x i1> undef, i32 undef) + call <16 x i16> @llvm.experimental.vp.splat.v16i16(i16 undef, <16 x i1> undef, i32 undef) + call <2 x i32> @llvm.experimental.vp.splat.v2i32(i32 undef, <2 x i1> undef, i32 undef) + call <4 x i32> @llvm.experimental.vp.splat.v4i32(i32 undef, <4 x i1> undef, i32 undef) + call <8 x i32> @llvm.experimental.vp.splat.v8i32(i32 undef, <8 x i1> undef, i32 undef) + call <16 x i32> @llvm.experimental.vp.splat.v16i32(i32 undef, <16 x i1> undef, i32 undef) + call <2 x i64> @llvm.experimental.vp.splat.v2i64(i64 undef, <2 x i1> undef, i32 undef) + call <4 x i64> @llvm.experimental.vp.splat.v4i64(i64 undef, <4 x i1> undef, i32 undef) + call <8 x i64> @llvm.experimental.vp.splat.v8i64(i64 undef, <8 x i1> undef, i32 undef) + call <16 x i64> @llvm.experimental.vp.splat.v16i64(i64 undef, <16 x i1> undef, i32 undef) + call <2 x bfloat> @llvm.experimental.vp.splat.v2bf16(bfloat undef, <2 x i1> undef, i32 undef) + call <4 x bfloat> @llvm.experimental.vp.splat.v4bf16(bfloat undef, <4 x i1> undef, i32 undef) + call <8 x bfloat> @llvm.experimental.vp.splat.v8bf16(bfloat undef, <8 x i1> undef, i32 undef) + call <16 x bfloat> @llvm.experimental.vp.splat.v16bf16(bfloat undef, <16 x i1> undef, i32 undef) + call <2 x half> @llvm.experimental.vp.splat.v2f16(half undef, <2 x i1> undef, i32 undef) + call <4 x half> @llvm.experimental.vp.splat.v4f16(half undef, <4 x i1> undef, i32 undef) + call <8 x half> @llvm.experimental.vp.splat.v8f16(half undef, <8 x i1> undef, i32 undef) + call <16 x half> @llvm.experimental.vp.splat.v16f16(half undef, <16 x i1> undef, i32 undef) + call <2 x float> @llvm.experimental.vp.splat.v2f32(float undef, <2 x i1> undef, i32 undef) + call <4 x float> @llvm.experimental.vp.splat.v4f32(float undef, <4 x i1> undef, i32 undef) + call <8 x float> @llvm.experimental.vp.splat.v8f32(float undef, <8 x i1> undef, i32 undef) + call <16 x float> @llvm.experimental.vp.splat.v16f32(float undef, <16 x i1> undef, i32 undef) + call <2 x double> @llvm.experimental.vp.splat.v2f64(double undef, <2 x i1> undef, i32 undef) + call <4 x double> @llvm.experimental.vp.splat.v4f64(double undef, <4 x i1> undef, i32 undef) + call <8 x double> @llvm.experimental.vp.splat.v8f64(double undef, <8 x i1> undef, i32 undef) + call <16 x double> @llvm.experimental.vp.splat.v16f64(double undef, <16 x i1> undef, i32 undef) + call @llvm.experimental.vp.splat.nxv2i1(i1 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv4i1(i1 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv8i1(i1 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv16i1(i1 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv2i8(i8 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv4i8(i8 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv8i8(i8 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv16i8(i8 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv2i16(i16 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv4i16(i16 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv8i16(i16 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv16i16(i16 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv2i32(i32 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv4i32(i32 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv8i32(i32 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv16i32(i32 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv2i64(i64 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv4i64(i64 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv8i64(i64 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv16i64(i64 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv2bf16(bfloat undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv4bf16(bfloat undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv8bf16(bfloat undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv16bf16(bfloat undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv2f16(half undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv4f16(half undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv8f16(half undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv16f16(half undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv2f32(float undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv4f32(float undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv8f32(float undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv16f32(float undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv2f64(double undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv4f64(double undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv8f64(double undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv16f64(double undef, undef, i32 undef) + ret void +} + declare <2 x i8> @llvm.vp.add.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32) declare <4 x i8> @llvm.vp.add.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32) declare <8 x i8> @llvm.vp.add.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32) diff --git a/llvm/test/Analysis/CostModel/X86/fptoi_sat.ll b/llvm/test/Analysis/CostModel/X86/fptoi_sat.ll index 55b80350f595e..41bf88b1ec316 100644 --- a/llvm/test/Analysis/CostModel/X86/fptoi_sat.ll +++ b/llvm/test/Analysis/CostModel/X86/fptoi_sat.ll @@ -1016,45 +1016,45 @@ define void @fp16() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 183 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; @@ -1069,45 +1069,45 @@ define void @fp16() { ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 183 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll index 3810938a5a52f..fb1420ee34004 100644 --- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll @@ -261,6 +261,50 @@ bb: ret void } +declare <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3)) + +; CHECK: DIVERGENT: %tmp0 = call <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3) %gep) +define amdgpu_kernel void @ds_read_b64_tr4_v2i32(ptr addrspace(3) %addr, ptr addrspace(1) %out) { +bb: + %gep = getelementptr i64, ptr addrspace(3) %addr, i32 4 + %tmp0 = call <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3) %gep) + store <2 x i32> %tmp0, ptr addrspace(1) %out, align 8 + ret void +} + +declare <3 x i32> @llvm.amdgcn.ds.read.tr6.b96.v3i32(ptr addrspace(3)) + +; CHECK: DIVERGENT: %tmp0 = call <3 x i32> @llvm.amdgcn.ds.read.tr6.b96.v3i32(ptr addrspace(3) %gep) +define amdgpu_kernel void @ds_read_b96_tr6_v3i32(ptr addrspace(3) %addr, ptr addrspace(1) %out) { +bb: + %gep = getelementptr i64, ptr addrspace(3) %addr, i32 4 + %tmp0 = call <3 x i32> @llvm.amdgcn.ds.read.tr6.b96.v3i32(ptr addrspace(3) %gep) + store <3 x i32> %tmp0, ptr addrspace(1) %out, align 16 + ret void +} + +declare <2 x i32> @llvm.amdgcn.ds.read.tr8.b64.v2i32(ptr addrspace(3)) + +; CHECK: DIVERGENT: %tmp0 = call <2 x i32> @llvm.amdgcn.ds.read.tr8.b64.v2i32(ptr addrspace(3) %gep) +define amdgpu_kernel void @ds_read_b64_tr8_v2i32(ptr addrspace(3) %addr, ptr addrspace(1) %out) { +bb: + %gep = getelementptr i64, ptr addrspace(3) %addr, i32 4 + %tmp0 = call <2 x i32> @llvm.amdgcn.ds.read.tr8.b64.v2i32(ptr addrspace(3) %gep) + store <2 x i32> %tmp0, ptr addrspace(1) %out, align 8 + ret void +} + +declare <4 x i16> @llvm.amdgcn.ds.read.tr16.b64.v4i16(ptr addrspace(3)) + +; CHECK: DIVERGENT: %tmp0 = call <4 x i16> @llvm.amdgcn.ds.read.tr16.b64.v4i16(ptr addrspace(3) %gep) +define amdgpu_kernel void @ds_read_b64_tr_b16_v4i16(ptr addrspace(3) %addr, ptr addrspace(1) %out) { +bb: + %gep = getelementptr i64, ptr addrspace(3) %addr, i16 4 + %tmp0 = call <4 x i16> @llvm.amdgcn.ds.read.tr16.b64.v4i16(ptr addrspace(3) %gep) + store <4 x i16> %tmp0, ptr addrspace(1) %out, align 16 + ret void +} + declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.f16(<8 x half>, <8 x half>, <4 x float>, i32 immarg, i32 immarg, i32 immarg) declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.f16(<8 x half>, <8 x half>, <16 x float>, i32 immarg, i32 immarg, i32 immarg) @@ -440,6 +484,22 @@ define amdgpu_kernel void @smfmac_f32_32x32x64_fp8_fp8(<4 x i32> %arg0, <8 x i32 ret void } +; CHECK: DIVERGENT: %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %src0, i32 %src1, i1 false, i1 false) +define amdgpu_kernel void @v_permlane16_swap(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 { + %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %src0, i32 %src1, i1 false, i1 false) + store { i32, i32 } %v, ptr addrspace(1) %out + ret void +} + +; CHECK: DIVERGENT: %v = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 %src0, i32 %src1, i1 false, i1 false) +define amdgpu_kernel void @v_permlane32_swap(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 { + %v = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 %src0, i32 %src1, i1 false, i1 false) + store { i32, i32 } %v, ptr addrspace(1) %out + ret void +} + + + declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #1 declare i32 @llvm.amdgcn.permlane16.i32(i32, i32, i32, i32, i1, i1) #1 declare i32 @llvm.amdgcn.permlanex16.i32(i32, i32, i32, i32, i1, i1) #1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir index c92718f9e9b3c..2464026aa125b 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir @@ -59,8 +59,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $q0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $q1 - ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x p0>) = G_SHUFFLE_VECTOR [[COPY]](<2 x p0>), [[COPY1]], shufflemask(0, 0) - ; CHECK-NEXT: $q0 = COPY [[SHUF]](<2 x p0>) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(<2 x s64>) = G_PTRTOINT [[COPY]](<2 x p0>) + ; CHECK-NEXT: [[PTRTOINT1:%[0-9]+]]:_(<2 x s64>) = G_PTRTOINT [[COPY1]](<2 x p0>) + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[PTRTOINT]](<2 x s64>), [[PTRTOINT1]], shufflemask(0, 0) + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(<2 x p0>) = G_INTTOPTR [[SHUF]](<2 x s64>) + ; CHECK-NEXT: $q0 = COPY [[INTTOPTR]](<2 x p0>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<2 x p0>) = COPY $q0 %1:_(<2 x p0>) = COPY $q1 diff --git a/llvm/test/CodeGen/AArch64/abs.ll b/llvm/test/CodeGen/AArch64/abs.ll index 25a14ef9a49ee..d501d9ed24547 100644 --- a/llvm/test/CodeGen/AArch64/abs.ll +++ b/llvm/test/CodeGen/AArch64/abs.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; ===== Legal Scalars ===== diff --git a/llvm/test/CodeGen/AArch64/arm64-clrsb.ll b/llvm/test/CodeGen/AArch64/arm64-clrsb.ll index 412c2b00a5ac0..9c54238c68e2c 100644 --- a/llvm/test/CodeGen/AArch64/arm64-clrsb.ll +++ b/llvm/test/CodeGen/AArch64/arm64-clrsb.ll @@ -1,78 +1,68 @@ -; RUN: llc < %s -mtriple=arm64-apple-ios7.0.0 | FileCheck %s -; RUN: llc < %s -mtriple=arm64-apple-ios7.0.0 -O0 -pass-remarks-missed=gisel* -global-isel-abort=2 | FileCheck %s --check-prefixes=GISEL,FALLBACK +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=arm64-apple-ios7.0.0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=arm64-apple-ios7.0.0 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" -; Function Attrs: nounwind readnone declare i32 @llvm.ctlz.i32(i32, i1) #0 declare i64 @llvm.ctlz.i64(i64, i1) #1 -; Function Attrs: nounwind ssp -; FALLBACK-NOT: remark{{.*}}clrsb32 define i32 @clrsb32(i32 %x) #2 { +; CHECK-LABEL: clrsb32: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: cls w0, w0 +; CHECK-NEXT: ret entry: %shr = ashr i32 %x, 31 %xor = xor i32 %shr, %x %mul = shl i32 %xor, 1 %add = or i32 %mul, 1 %0 = tail call i32 @llvm.ctlz.i32(i32 %add, i1 false) - ret i32 %0 -; CHECK-LABEL: clrsb32 -; CHECK: cls [[TEMP:w[0-9]+]], [[TEMP]] - -; GISEL-LABEL: clrsb32 -; GISEL: cls [[TEMP:w[0-9]+]], [[TEMP]] } -; Function Attrs: nounwind ssp -; FALLBACK-NOT: remark{{.*}}clrsb64 define i64 @clrsb64(i64 %x) #3 { +; CHECK-LABEL: clrsb64: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: cls x0, x0 +; CHECK-NEXT: ret entry: %shr = ashr i64 %x, 63 %xor = xor i64 %shr, %x %mul = shl nsw i64 %xor, 1 %add = or i64 %mul, 1 %0 = tail call i64 @llvm.ctlz.i64(i64 %add, i1 false) - ret i64 %0 -; CHECK-LABEL: clrsb64 -; CHECK: cls [[TEMP:x[0-9]+]], [[TEMP]] -; GISEL-LABEL: clrsb64 -; GISEL: cls [[TEMP:x[0-9]+]], [[TEMP]] } -; Function Attrs: nounwind ssp -; FALLBACK-NOT: remark{{.*}}clrsb32_zeroundef define i32 @clrsb32_zeroundef(i32 %x) #2 { +; CHECK-LABEL: clrsb32_zeroundef: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: cls w0, w0 +; CHECK-NEXT: ret entry: %shr = ashr i32 %x, 31 %xor = xor i32 %shr, %x %mul = shl i32 %xor, 1 %add = or i32 %mul, 1 %0 = tail call i32 @llvm.ctlz.i32(i32 %add, i1 true) - ret i32 %0 -; CHECK-LABEL: clrsb32_zeroundef -; CHECK: cls [[TEMP:w[0-9]+]], [[TEMP]] - -; GISEL-LABEL: clrsb32_zeroundef -; GISEL: cls [[TEMP:w[0-9]+]], [[TEMP]] } -; Function Attrs: nounwind ssp -; FALLBACK-NOT: remark{{.*}}clrsb64 define i64 @clrsb64_zeroundef(i64 %x) #3 { +; CHECK-LABEL: clrsb64_zeroundef: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: cls x0, x0 +; CHECK-NEXT: ret entry: %shr = ashr i64 %x, 63 %xor = xor i64 %shr, %x %mul = shl nsw i64 %xor, 1 %add = or i64 %mul, 1 %0 = tail call i64 @llvm.ctlz.i64(i64 %add, i1 true) - ret i64 %0 -; CHECK-LABEL: clrsb64_zeroundef -; CHECK: cls [[TEMP:x[0-9]+]], [[TEMP]] -; GISEL-LABEL: clrsb64_zeroundef -; GISEL: cls [[TEMP:x[0-9]+]], [[TEMP]] } + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-GI: {{.*}} +; CHECK-SD: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/arm64-ext.ll b/llvm/test/CodeGen/AArch64/arm64-ext.ll index e32d83327fe42..50df6a0388587 100644 --- a/llvm/test/CodeGen/AArch64/arm64-ext.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ext.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=arm64-eabi -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for test_v2p0 +; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI define <8 x i8> @test_vextd(<8 x i8> %tmp1, <8 x i8> %tmp2) { ; CHECK-LABEL: test_vextd: diff --git a/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll b/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll index 475affa358bd1..0e1e15f9b6b91 100644 --- a/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll +++ b/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll @@ -1,12 +1,22 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI define void @testLeftGood8x8(<8 x i8> %src1, <8 x i8> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftGood8x8: -; CHECK: // %bb.0: -; CHECK-NEXT: sli.8b v0, v1, #3 -; CHECK-NEXT: str d0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftGood8x8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sli.8b v0, v1, #3 +; CHECK-SD-NEXT: str d0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftGood8x8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi.8b v2, #7 +; CHECK-GI-NEXT: shl.8b v1, v1, #3 +; CHECK-GI-NEXT: and.8b v0, v0, v2 +; CHECK-GI-NEXT: orr.8b v0, v0, v1 +; CHECK-GI-NEXT: str d0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <8 x i8> %src1, %vshl_n = shl <8 x i8> %src2, %result = or <8 x i8> %and.i, %vshl_n @@ -15,14 +25,23 @@ define void @testLeftGood8x8(<8 x i8> %src1, <8 x i8> %src2, ptr %dest) nounwind } define void @testLeftBad8x8(<8 x i8> %src1, <8 x i8> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftBad8x8: -; CHECK: // %bb.0: -; CHECK-NEXT: movi.8b v2, #165 -; CHECK-NEXT: add.8b v1, v1, v1 -; CHECK-NEXT: and.8b v0, v0, v2 -; CHECK-NEXT: orr.8b v0, v0, v1 -; CHECK-NEXT: str d0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftBad8x8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: movi.8b v2, #165 +; CHECK-SD-NEXT: add.8b v1, v1, v1 +; CHECK-SD-NEXT: and.8b v0, v0, v2 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: str d0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftBad8x8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi.8b v2, #165 +; CHECK-GI-NEXT: shl.8b v1, v1, #1 +; CHECK-GI-NEXT: and.8b v0, v0, v2 +; CHECK-GI-NEXT: orr.8b v0, v0, v1 +; CHECK-GI-NEXT: str d0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <8 x i8> %src1, %vshl_n = shl <8 x i8> %src2, %result = or <8 x i8> %and.i, %vshl_n @@ -31,11 +50,20 @@ define void @testLeftBad8x8(<8 x i8> %src1, <8 x i8> %src2, ptr %dest) nounwind } define void @testRightGood8x8(<8 x i8> %src1, <8 x i8> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testRightGood8x8: -; CHECK: // %bb.0: -; CHECK-NEXT: sri.8b v0, v1, #3 -; CHECK-NEXT: str d0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testRightGood8x8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sri.8b v0, v1, #3 +; CHECK-SD-NEXT: str d0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testRightGood8x8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi.8b v2, #224 +; CHECK-GI-NEXT: ushr.8b v1, v1, #3 +; CHECK-GI-NEXT: and.8b v0, v0, v2 +; CHECK-GI-NEXT: orr.8b v0, v0, v1 +; CHECK-GI-NEXT: str d0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <8 x i8> %src1, %vshl_n = lshr <8 x i8> %src2, %result = or <8 x i8> %and.i, %vshl_n @@ -60,11 +88,20 @@ define void @testRightBad8x8(<8 x i8> %src1, <8 x i8> %src2, ptr %dest) nounwind } define void @testLeftGood16x8(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftGood16x8: -; CHECK: // %bb.0: -; CHECK-NEXT: sli.16b v0, v1, #3 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftGood16x8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sli.16b v0, v1, #3 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftGood16x8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi.16b v2, #7 +; CHECK-GI-NEXT: shl.16b v1, v1, #3 +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <16 x i8> %src1, %vshl_n = shl <16 x i8> %src2, %result = or <16 x i8> %and.i, %vshl_n @@ -73,14 +110,23 @@ define void @testLeftGood16x8(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) nounw } define void @testLeftBad16x8(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftBad16x8: -; CHECK: // %bb.0: -; CHECK-NEXT: movi.16b v2, #165 -; CHECK-NEXT: add.16b v1, v1, v1 -; CHECK-NEXT: and.16b v0, v0, v2 -; CHECK-NEXT: orr.16b v0, v0, v1 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftBad16x8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: movi.16b v2, #165 +; CHECK-SD-NEXT: add.16b v1, v1, v1 +; CHECK-SD-NEXT: and.16b v0, v0, v2 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftBad16x8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi.16b v2, #165 +; CHECK-GI-NEXT: shl.16b v1, v1, #1 +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <16 x i8> %src1, %vshl_n = shl <16 x i8> %src2, %result = or <16 x i8> %and.i, %vshl_n @@ -89,11 +135,20 @@ define void @testLeftBad16x8(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) nounwi } define void @testRightGood16x8(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testRightGood16x8: -; CHECK: // %bb.0: -; CHECK-NEXT: sri.16b v0, v1, #3 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testRightGood16x8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sri.16b v0, v1, #3 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testRightGood16x8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi.16b v2, #224 +; CHECK-GI-NEXT: ushr.16b v1, v1, #3 +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <16 x i8> %src1, %vshl_n = lshr <16 x i8> %src2, %result = or <16 x i8> %and.i, %vshl_n @@ -118,11 +173,20 @@ define void @testRightBad16x8(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) nounw } define void @testLeftGood4x16(<4 x i16> %src1, <4 x i16> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftGood4x16: -; CHECK: // %bb.0: -; CHECK-NEXT: sli.4h v0, v1, #14 -; CHECK-NEXT: str d0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftGood4x16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sli.4h v0, v1, #14 +; CHECK-SD-NEXT: str d0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftGood4x16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mvni.4h v2, #192, lsl #8 +; CHECK-GI-NEXT: shl.4h v1, v1, #14 +; CHECK-GI-NEXT: and.8b v0, v0, v2 +; CHECK-GI-NEXT: orr.8b v0, v0, v1 +; CHECK-GI-NEXT: str d0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <4 x i16> %src1, %vshl_n = shl <4 x i16> %src2, %result = or <4 x i16> %and.i, %vshl_n @@ -131,15 +195,25 @@ define void @testLeftGood4x16(<4 x i16> %src1, <4 x i16> %src2, ptr %dest) nounw } define void @testLeftBad4x16(<4 x i16> %src1, <4 x i16> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftBad4x16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #16500 -; CHECK-NEXT: shl.4h v1, v1, #14 -; CHECK-NEXT: dup.4h v2, w8 -; CHECK-NEXT: and.8b v0, v0, v2 -; CHECK-NEXT: orr.8b v0, v0, v1 -; CHECK-NEXT: str d0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftBad4x16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #16500 // =0x4074 +; CHECK-SD-NEXT: shl.4h v1, v1, #14 +; CHECK-SD-NEXT: dup.4h v2, w8 +; CHECK-SD-NEXT: and.8b v0, v0, v2 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: str d0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftBad4x16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI9_0 +; CHECK-GI-NEXT: shl.4h v1, v1, #14 +; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI9_0] +; CHECK-GI-NEXT: and.8b v0, v0, v2 +; CHECK-GI-NEXT: orr.8b v0, v0, v1 +; CHECK-GI-NEXT: str d0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <4 x i16> %src1, %vshl_n = shl <4 x i16> %src2, %result = or <4 x i16> %and.i, %vshl_n @@ -148,11 +222,20 @@ define void @testLeftBad4x16(<4 x i16> %src1, <4 x i16> %src2, ptr %dest) nounwi } define void @testRightGood4x16(<4 x i16> %src1, <4 x i16> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testRightGood4x16: -; CHECK: // %bb.0: -; CHECK-NEXT: sri.4h v0, v1, #14 -; CHECK-NEXT: str d0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testRightGood4x16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sri.4h v0, v1, #14 +; CHECK-SD-NEXT: str d0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testRightGood4x16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mvni.4h v2, #3 +; CHECK-GI-NEXT: ushr.4h v1, v1, #14 +; CHECK-GI-NEXT: and.8b v0, v0, v2 +; CHECK-GI-NEXT: orr.8b v0, v0, v1 +; CHECK-GI-NEXT: str d0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <4 x i16> %src1, %vshl_n = lshr <4 x i16> %src2, %result = or <4 x i16> %and.i, %vshl_n @@ -161,14 +244,24 @@ define void @testRightGood4x16(<4 x i16> %src1, <4 x i16> %src2, ptr %dest) noun } define void @testRightBad4x16(<4 x i16> %src1, <4 x i16> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testRightBad4x16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #16500 -; CHECK-NEXT: dup.4h v2, w8 -; CHECK-NEXT: and.8b v0, v0, v2 -; CHECK-NEXT: usra.4h v0, v1, #14 -; CHECK-NEXT: str d0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testRightBad4x16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #16500 // =0x4074 +; CHECK-SD-NEXT: dup.4h v2, w8 +; CHECK-SD-NEXT: and.8b v0, v0, v2 +; CHECK-SD-NEXT: usra.4h v0, v1, #14 +; CHECK-SD-NEXT: str d0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testRightBad4x16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI11_0 +; CHECK-GI-NEXT: ushr.4h v1, v1, #14 +; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI11_0] +; CHECK-GI-NEXT: and.8b v0, v0, v2 +; CHECK-GI-NEXT: orr.8b v0, v0, v1 +; CHECK-GI-NEXT: str d0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <4 x i16> %src1, %vshl_n = lshr <4 x i16> %src2, %result = or <4 x i16> %and.i, %vshl_n @@ -177,11 +270,20 @@ define void @testRightBad4x16(<4 x i16> %src1, <4 x i16> %src2, ptr %dest) nounw } define void @testLeftGood8x16(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftGood8x16: -; CHECK: // %bb.0: -; CHECK-NEXT: sli.8h v0, v1, #14 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftGood8x16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sli.8h v0, v1, #14 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftGood8x16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mvni.8h v2, #192, lsl #8 +; CHECK-GI-NEXT: shl.8h v1, v1, #14 +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <8 x i16> %src1, %vshl_n = shl <8 x i16> %src2, %result = or <8 x i16> %and.i, %vshl_n @@ -190,15 +292,25 @@ define void @testLeftGood8x16(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) nounw } define void @testLeftBad8x16(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftBad8x16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #16500 -; CHECK-NEXT: shl.8h v1, v1, #14 -; CHECK-NEXT: dup.8h v2, w8 -; CHECK-NEXT: and.16b v0, v0, v2 -; CHECK-NEXT: orr.16b v0, v0, v1 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftBad8x16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #16500 // =0x4074 +; CHECK-SD-NEXT: shl.8h v1, v1, #14 +; CHECK-SD-NEXT: dup.8h v2, w8 +; CHECK-SD-NEXT: and.16b v0, v0, v2 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftBad8x16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI13_0 +; CHECK-GI-NEXT: shl.8h v1, v1, #14 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI13_0] +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <8 x i16> %src1, %vshl_n = shl <8 x i16> %src2, %result = or <8 x i16> %and.i, %vshl_n @@ -207,11 +319,20 @@ define void @testLeftBad8x16(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) nounwi } define void @testRightGood8x16(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testRightGood8x16: -; CHECK: // %bb.0: -; CHECK-NEXT: sri.8h v0, v1, #14 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testRightGood8x16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sri.8h v0, v1, #14 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testRightGood8x16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mvni.8h v2, #3 +; CHECK-GI-NEXT: ushr.8h v1, v1, #14 +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <8 x i16> %src1, %vshl_n = lshr <8 x i16> %src2, %result = or <8 x i16> %and.i, %vshl_n @@ -220,14 +341,24 @@ define void @testRightGood8x16(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) noun } define void @testRightBad8x16(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testRightBad8x16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #16500 -; CHECK-NEXT: dup.8h v2, w8 -; CHECK-NEXT: and.16b v0, v0, v2 -; CHECK-NEXT: usra.8h v0, v1, #14 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testRightBad8x16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #16500 // =0x4074 +; CHECK-SD-NEXT: dup.8h v2, w8 +; CHECK-SD-NEXT: and.16b v0, v0, v2 +; CHECK-SD-NEXT: usra.8h v0, v1, #14 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testRightBad8x16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI15_0 +; CHECK-GI-NEXT: ushr.8h v1, v1, #14 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI15_0] +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <8 x i16> %src1, %vshl_n = lshr <8 x i16> %src2, %result = or <8 x i16> %and.i, %vshl_n @@ -236,11 +367,20 @@ define void @testRightBad8x16(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) nounw } define void @testLeftGood2x32(<2 x i32> %src1, <2 x i32> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftGood2x32: -; CHECK: // %bb.0: -; CHECK-NEXT: sli.2s v0, v1, #22 -; CHECK-NEXT: str d0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftGood2x32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sli.2s v0, v1, #22 +; CHECK-SD-NEXT: str d0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftGood2x32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi.2s v2, #63, msl #16 +; CHECK-GI-NEXT: shl.2s v1, v1, #22 +; CHECK-GI-NEXT: and.8b v0, v0, v2 +; CHECK-GI-NEXT: orr.8b v0, v0, v1 +; CHECK-GI-NEXT: str d0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <2 x i32> %src1, %vshl_n = shl <2 x i32> %src2, %result = or <2 x i32> %and.i, %vshl_n @@ -249,15 +389,25 @@ define void @testLeftGood2x32(<2 x i32> %src1, <2 x i32> %src2, ptr %dest) nounw } define void @testLeftBad2x32(<2 x i32> %src1, <2 x i32> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftBad2x32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #4194300 -; CHECK-NEXT: shl.2s v1, v1, #22 -; CHECK-NEXT: dup.2s v2, w8 -; CHECK-NEXT: and.8b v0, v0, v2 -; CHECK-NEXT: orr.8b v0, v0, v1 -; CHECK-NEXT: str d0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftBad2x32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #4194300 // =0x3ffffc +; CHECK-SD-NEXT: shl.2s v1, v1, #22 +; CHECK-SD-NEXT: dup.2s v2, w8 +; CHECK-SD-NEXT: and.8b v0, v0, v2 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: str d0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftBad2x32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI17_0 +; CHECK-GI-NEXT: shl.2s v1, v1, #22 +; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI17_0] +; CHECK-GI-NEXT: and.8b v0, v0, v2 +; CHECK-GI-NEXT: orr.8b v0, v0, v1 +; CHECK-GI-NEXT: str d0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <2 x i32> %src1, %vshl_n = shl <2 x i32> %src2, %result = or <2 x i32> %and.i, %vshl_n @@ -266,11 +416,20 @@ define void @testLeftBad2x32(<2 x i32> %src1, <2 x i32> %src2, ptr %dest) nounwi } define void @testRightGood2x32(<2 x i32> %src1, <2 x i32> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testRightGood2x32: -; CHECK: // %bb.0: -; CHECK-NEXT: sri.2s v0, v1, #22 -; CHECK-NEXT: str d0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testRightGood2x32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sri.2s v0, v1, #22 +; CHECK-SD-NEXT: str d0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testRightGood2x32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mvni.2s v2, #3, msl #8 +; CHECK-GI-NEXT: ushr.2s v1, v1, #22 +; CHECK-GI-NEXT: and.8b v0, v0, v2 +; CHECK-GI-NEXT: orr.8b v0, v0, v1 +; CHECK-GI-NEXT: str d0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <2 x i32> %src1, %vshl_n = lshr <2 x i32> %src2, %result = or <2 x i32> %and.i, %vshl_n @@ -279,15 +438,25 @@ define void @testRightGood2x32(<2 x i32> %src1, <2 x i32> %src2, ptr %dest) noun } define void @testRightBad2x32(<2 x i32> %src1, <2 x i32> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testRightBad2x32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #4194300 -; CHECK-NEXT: ushr.2s v1, v1, #22 -; CHECK-NEXT: dup.2s v2, w8 -; CHECK-NEXT: and.8b v0, v0, v2 -; CHECK-NEXT: orr.8b v0, v0, v1 -; CHECK-NEXT: str d0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testRightBad2x32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #4194300 // =0x3ffffc +; CHECK-SD-NEXT: ushr.2s v1, v1, #22 +; CHECK-SD-NEXT: dup.2s v2, w8 +; CHECK-SD-NEXT: and.8b v0, v0, v2 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: str d0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testRightBad2x32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI19_0 +; CHECK-GI-NEXT: ushr.2s v1, v1, #22 +; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI19_0] +; CHECK-GI-NEXT: and.8b v0, v0, v2 +; CHECK-GI-NEXT: orr.8b v0, v0, v1 +; CHECK-GI-NEXT: str d0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <2 x i32> %src1, %vshl_n = lshr <2 x i32> %src2, %result = or <2 x i32> %and.i, %vshl_n @@ -296,11 +465,20 @@ define void @testRightBad2x32(<2 x i32> %src1, <2 x i32> %src2, ptr %dest) nounw } define void @testLeftGood4x32(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftGood4x32: -; CHECK: // %bb.0: -; CHECK-NEXT: sli.4s v0, v1, #22 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftGood4x32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sli.4s v0, v1, #22 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftGood4x32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi.4s v2, #63, msl #16 +; CHECK-GI-NEXT: shl.4s v1, v1, #22 +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <4 x i32> %src1, %vshl_n = shl <4 x i32> %src2, %result = or <4 x i32> %and.i, %vshl_n @@ -309,15 +487,25 @@ define void @testLeftGood4x32(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) nounw } define void @testLeftBad4x32(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftBad4x32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #4194300 -; CHECK-NEXT: shl.4s v1, v1, #22 -; CHECK-NEXT: dup.4s v2, w8 -; CHECK-NEXT: and.16b v0, v0, v2 -; CHECK-NEXT: orr.16b v0, v0, v1 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftBad4x32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #4194300 // =0x3ffffc +; CHECK-SD-NEXT: shl.4s v1, v1, #22 +; CHECK-SD-NEXT: dup.4s v2, w8 +; CHECK-SD-NEXT: and.16b v0, v0, v2 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftBad4x32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI21_0 +; CHECK-GI-NEXT: shl.4s v1, v1, #22 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI21_0] +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <4 x i32> %src1, %vshl_n = shl <4 x i32> %src2, %result = or <4 x i32> %and.i, %vshl_n @@ -326,11 +514,20 @@ define void @testLeftBad4x32(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) nounwi } define void @testRightGood4x32(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testRightGood4x32: -; CHECK: // %bb.0: -; CHECK-NEXT: sri.4s v0, v1, #22 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testRightGood4x32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sri.4s v0, v1, #22 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testRightGood4x32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mvni.4s v2, #3, msl #8 +; CHECK-GI-NEXT: ushr.4s v1, v1, #22 +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <4 x i32> %src1, %vshl_n = lshr <4 x i32> %src2, %result = or <4 x i32> %and.i, %vshl_n @@ -339,15 +536,25 @@ define void @testRightGood4x32(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) noun } define void @testRightBad4x32(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testRightBad4x32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #4194300 -; CHECK-NEXT: ushr.4s v1, v1, #22 -; CHECK-NEXT: dup.4s v2, w8 -; CHECK-NEXT: and.16b v0, v0, v2 -; CHECK-NEXT: orr.16b v0, v0, v1 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testRightBad4x32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #4194300 // =0x3ffffc +; CHECK-SD-NEXT: ushr.4s v1, v1, #22 +; CHECK-SD-NEXT: dup.4s v2, w8 +; CHECK-SD-NEXT: and.16b v0, v0, v2 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testRightBad4x32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI23_0 +; CHECK-GI-NEXT: ushr.4s v1, v1, #22 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI23_0] +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <4 x i32> %src1, %vshl_n = lshr <4 x i32> %src2, %result = or <4 x i32> %and.i, %vshl_n @@ -356,11 +563,20 @@ define void @testRightBad4x32(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) nounw } define void @testLeftGood2x64(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftGood2x64: -; CHECK: // %bb.0: -; CHECK-NEXT: sli.2d v0, v1, #48 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftGood2x64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sli.2d v0, v1, #48 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftGood2x64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi.2d v2, #0x00ffffffffffff +; CHECK-GI-NEXT: shl.2d v1, v1, #48 +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <2 x i64> %src1, %vshl_n = shl <2 x i64> %src2, %result = or <2 x i64> %and.i, %vshl_n @@ -369,16 +585,26 @@ define void @testLeftGood2x64(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) nounw } define void @testLeftBad2x64(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftBad2x64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #10 -; CHECK-NEXT: shl.2d v1, v1, #48 -; CHECK-NEXT: movk x8, #1, lsl #48 -; CHECK-NEXT: dup.2d v2, x8 -; CHECK-NEXT: and.16b v0, v0, v2 -; CHECK-NEXT: orr.16b v0, v0, v1 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftBad2x64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov x8, #10 // =0xa +; CHECK-SD-NEXT: shl.2d v1, v1, #48 +; CHECK-SD-NEXT: movk x8, #1, lsl #48 +; CHECK-SD-NEXT: dup.2d v2, x8 +; CHECK-SD-NEXT: and.16b v0, v0, v2 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftBad2x64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI25_0 +; CHECK-GI-NEXT: shl.2d v1, v1, #48 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI25_0] +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <2 x i64> %src1, %vshl_n = shl <2 x i64> %src2, %result = or <2 x i64> %and.i, %vshl_n @@ -387,11 +613,20 @@ define void @testLeftBad2x64(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) nounwi } define void @testRightGood2x64(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testRightGood2x64: -; CHECK: // %bb.0: -; CHECK-NEXT: sri.2d v0, v1, #48 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testRightGood2x64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sri.2d v0, v1, #48 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testRightGood2x64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi.2d v2, #0xffffffffffff0000 +; CHECK-GI-NEXT: ushr.2d v1, v1, #48 +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <2 x i64> %src1, %vshl_n = lshr <2 x i64> %src2, %result = or <2 x i64> %and.i, %vshl_n @@ -400,16 +635,26 @@ define void @testRightGood2x64(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) noun } define void @testRightBad2x64(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testRightBad2x64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #10 -; CHECK-NEXT: ushr.2d v1, v1, #48 -; CHECK-NEXT: movk x8, #1, lsl #48 -; CHECK-NEXT: dup.2d v2, x8 -; CHECK-NEXT: and.16b v0, v0, v2 -; CHECK-NEXT: orr.16b v0, v0, v1 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testRightBad2x64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov x8, #10 // =0xa +; CHECK-SD-NEXT: ushr.2d v1, v1, #48 +; CHECK-SD-NEXT: movk x8, #1, lsl #48 +; CHECK-SD-NEXT: dup.2d v2, x8 +; CHECK-SD-NEXT: and.16b v0, v0, v2 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testRightBad2x64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI27_0 +; CHECK-GI-NEXT: ushr.2d v1, v1, #48 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI27_0] +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <2 x i64> %src1, %vshl_n = lshr <2 x i64> %src2, %result = or <2 x i64> %and.i, %vshl_n @@ -418,11 +663,19 @@ define void @testRightBad2x64(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) nounw } define void @testLeftShouldNotCreateSLI1x128(<1 x i128> %src1, <1 x i128> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftShouldNotCreateSLI1x128: -; CHECK: // %bb.0: -; CHECK-NEXT: bfi x1, x2, #6, #58 -; CHECK-NEXT: stp x0, x1, [x4] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftShouldNotCreateSLI1x128: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: bfi x1, x2, #6, #58 +; CHECK-SD-NEXT: stp x0, x1, [x4] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftShouldNotCreateSLI1x128: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov.d v0[0], x0 +; CHECK-GI-NEXT: bfi x1, x2, #6, #58 +; CHECK-GI-NEXT: mov.d v0[1], x1 +; CHECK-GI-NEXT: str q0, [x4] +; CHECK-GI-NEXT: ret %and.i = and <1 x i128> %src1, %vshl_n = shl <1 x i128> %src2, %result = or <1 x i128> %and.i, %vshl_n diff --git a/llvm/test/CodeGen/AArch64/arm64-vclz.ll b/llvm/test/CodeGen/AArch64/arm64-vclz.ll index 38c0572e23f89..c65e75c89e8da 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vclz.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vclz.ll @@ -1,154 +1,254 @@ -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s -; RUN: llc < %s -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; FALLBACK-NOT: remark{{.*}}test_vclz_u8 define <8 x i8> @test_vclz_u8(<8 x i8> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclz_u8: - ; CHECK: clz.8b v0, v0 - ; CHECK-NEXT: ret +; CHECK-LABEL: test_vclz_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: clz.8b v0, v0 +; CHECK-NEXT: ret %vclz.i = tail call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) nounwind ret <8 x i8> %vclz.i } -; FALLBACK-NOT: remark{{.*}}test_vclz_s8 define <8 x i8> @test_vclz_s8(<8 x i8> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclz_s8: - ; CHECK: clz.8b v0, v0 - ; CHECK-NEXT: ret +; CHECK-LABEL: test_vclz_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: clz.8b v0, v0 +; CHECK-NEXT: ret %vclz.i = tail call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) nounwind ret <8 x i8> %vclz.i } -; FALLBACK-NOT: remark{{.*}}test_vclz_u16 define <4 x i16> @test_vclz_u16(<4 x i16> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclz_u16: - ; CHECK: clz.4h v0, v0 - ; CHECK-NEXT: ret +; CHECK-LABEL: test_vclz_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: clz.4h v0, v0 +; CHECK-NEXT: ret %vclz1.i = tail call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) nounwind ret <4 x i16> %vclz1.i } -; FALLBACK-NOT: remark{{.*}}test_vclz_s16 define <4 x i16> @test_vclz_s16(<4 x i16> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclz_s16: - ; CHECK: clz.4h v0, v0 - ; CHECK-NEXT: ret +; CHECK-LABEL: test_vclz_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: clz.4h v0, v0 +; CHECK-NEXT: ret %vclz1.i = tail call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) nounwind ret <4 x i16> %vclz1.i } -; FALLBACK-NOT: remark{{.*}}test_vclz_u32 define <2 x i32> @test_vclz_u32(<2 x i32> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclz_u32: - ; CHECK: clz.2s v0, v0 - ; CHECK-NEXT: ret +; CHECK-LABEL: test_vclz_u32: +; CHECK: // %bb.0: +; CHECK-NEXT: clz.2s v0, v0 +; CHECK-NEXT: ret %vclz1.i = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) nounwind ret <2 x i32> %vclz1.i } -; FALLBACK-NOT: remark{{.*}}test_vclz_s32 define <2 x i32> @test_vclz_s32(<2 x i32> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclz_s32: - ; CHECK: clz.2s v0, v0 - ; CHECK-NEXT: ret +; CHECK-LABEL: test_vclz_s32: +; CHECK: // %bb.0: +; CHECK-NEXT: clz.2s v0, v0 +; CHECK-NEXT: ret %vclz1.i = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) nounwind ret <2 x i32> %vclz1.i } -; FALLBACK-NOT: remark{{.*}}test_vclz_u64 define <1 x i64> @test_vclz_u64(<1 x i64> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclz_u64: +; CHECK-SD-LABEL: test_vclz_u64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ushr d1, d0, #1 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: ushr d1, d0, #2 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: ushr d1, d0, #4 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: ushr d1, d0, #8 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: ushr d1, d0, #16 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: ushr d1, d0, #32 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: mvn.8b v0, v0 +; CHECK-SD-NEXT: cnt.8b v0, v0 +; CHECK-SD-NEXT: uaddlp.4h v0, v0 +; CHECK-SD-NEXT: uaddlp.2s v0, v0 +; CHECK-SD-NEXT: uaddlp.1d v0, v0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vclz_u64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: clz x8, x8 +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %vclz1.i = tail call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %a, i1 false) nounwind ret <1 x i64> %vclz1.i } -; FALLBACK-NOT: remark{{.*}}test_vclz_s64 define <1 x i64> @test_vclz_s64(<1 x i64> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclz_s64: +; CHECK-SD-LABEL: test_vclz_s64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ushr d1, d0, #1 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: ushr d1, d0, #2 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: ushr d1, d0, #4 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: ushr d1, d0, #8 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: ushr d1, d0, #16 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: ushr d1, d0, #32 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: mvn.8b v0, v0 +; CHECK-SD-NEXT: cnt.8b v0, v0 +; CHECK-SD-NEXT: uaddlp.4h v0, v0 +; CHECK-SD-NEXT: uaddlp.2s v0, v0 +; CHECK-SD-NEXT: uaddlp.1d v0, v0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vclz_s64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: clz x8, x8 +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %vclz1.i = tail call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %a, i1 false) nounwind ret <1 x i64> %vclz1.i } -; FALLBACK-NOT: remark{{.*}}test_vclzq_u8 define <16 x i8> @test_vclzq_u8(<16 x i8> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclzq_u8: - ; CHECK: clz.16b v0, v0 - ; CHECK-NEXT: ret +; CHECK-LABEL: test_vclzq_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: clz.16b v0, v0 +; CHECK-NEXT: ret %vclz.i = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) nounwind ret <16 x i8> %vclz.i } -; FALLBACK-NOT: remark{{.*}}test_vclzq_s8 define <16 x i8> @test_vclzq_s8(<16 x i8> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclzq_s8: - ; CHECK: clz.16b v0, v0 - ; CHECK-NEXT: ret +; CHECK-LABEL: test_vclzq_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: clz.16b v0, v0 +; CHECK-NEXT: ret %vclz.i = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) nounwind ret <16 x i8> %vclz.i } -; FALLBACK-NOT: remark{{.*}}test_vclzq_u16 define <8 x i16> @test_vclzq_u16(<8 x i16> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclzq_u16: - ; CHECK: clz.8h v0, v0 - ; CHECK-NEXT: ret +; CHECK-LABEL: test_vclzq_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: clz.8h v0, v0 +; CHECK-NEXT: ret %vclz1.i = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) nounwind ret <8 x i16> %vclz1.i } -; FALLBACK-NOT: remark{{.*}}test_vclzq_s16 define <8 x i16> @test_vclzq_s16(<8 x i16> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclzq_s16: - ; CHECK: clz.8h v0, v0 - ; CHECK-NEXT: ret +; CHECK-LABEL: test_vclzq_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: clz.8h v0, v0 +; CHECK-NEXT: ret %vclz1.i = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) nounwind ret <8 x i16> %vclz1.i } -; FALLBACK-NOT: remark{{.*}}test_vclzq_u32 define <4 x i32> @test_vclzq_u32(<4 x i32> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclzq_u32: - ; CHECK: clz.4s v0, v0 - ; CHECK-NEXT: ret +; CHECK-LABEL: test_vclzq_u32: +; CHECK: // %bb.0: +; CHECK-NEXT: clz.4s v0, v0 +; CHECK-NEXT: ret %vclz1.i = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) nounwind ret <4 x i32> %vclz1.i } -; FALLBACK-NOT: remark{{.*}}test_vclzq_s32 define <4 x i32> @test_vclzq_s32(<4 x i32> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclzq_s32: - ; CHECK: clz.4s v0, v0 - ; CHECK-NEXT: ret +; CHECK-LABEL: test_vclzq_s32: +; CHECK: // %bb.0: +; CHECK-NEXT: clz.4s v0, v0 +; CHECK-NEXT: ret %vclz1.i = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) nounwind ret <4 x i32> %vclz1.i } -; FALLBACK-NOT: remark{{.*}}test_vclzq_u64 define <2 x i64> @test_vclzq_u64(<2 x i64> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclzq_u64: +; CHECK-SD-LABEL: test_vclzq_u64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ushr.2d v1, v0, #1 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: ushr.2d v1, v0, #2 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: ushr.2d v1, v0, #4 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: ushr.2d v1, v0, #8 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: ushr.2d v1, v0, #16 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: ushr.2d v1, v0, #32 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: mvn.16b v0, v0 +; CHECK-SD-NEXT: cnt.16b v0, v0 +; CHECK-SD-NEXT: uaddlp.8h v0, v0 +; CHECK-SD-NEXT: uaddlp.4s v0, v0 +; CHECK-SD-NEXT: uaddlp.2d v0, v0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vclzq_u64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: mov.d x9, v0[1] +; CHECK-GI-NEXT: clz x8, x8 +; CHECK-GI-NEXT: mov.d v0[0], x8 +; CHECK-GI-NEXT: clz x8, x9 +; CHECK-GI-NEXT: mov.d v0[1], x8 +; CHECK-GI-NEXT: ret %vclz1.i = tail call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) nounwind ret <2 x i64> %vclz1.i } -; FALLBACK-NOT: remark{{.*}}test_vclzq_s64 define <2 x i64> @test_vclzq_s64(<2 x i64> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclzq_s64: +; CHECK-SD-LABEL: test_vclzq_s64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ushr.2d v1, v0, #1 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: ushr.2d v1, v0, #2 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: ushr.2d v1, v0, #4 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: ushr.2d v1, v0, #8 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: ushr.2d v1, v0, #16 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: ushr.2d v1, v0, #32 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: mvn.16b v0, v0 +; CHECK-SD-NEXT: cnt.16b v0, v0 +; CHECK-SD-NEXT: uaddlp.8h v0, v0 +; CHECK-SD-NEXT: uaddlp.4s v0, v0 +; CHECK-SD-NEXT: uaddlp.2d v0, v0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vclzq_s64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: mov.d x9, v0[1] +; CHECK-GI-NEXT: clz x8, x8 +; CHECK-GI-NEXT: mov.d v0[0], x8 +; CHECK-GI-NEXT: clz x8, x9 +; CHECK-GI-NEXT: mov.d v0[1], x8 +; CHECK-GI-NEXT: ret %vclz1.i = tail call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) nounwind ret <2 x i64> %vclz1.i } declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readnone - declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone - declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) nounwind readnone - declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) nounwind readnone - declare <1 x i64> @llvm.ctlz.v1i64(<1 x i64>, i1) nounwind readnone - declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone - declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) nounwind readnone - declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) nounwind readnone diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll index 7af7c235f9ac1..2f543cc324bc2 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll @@ -1,12 +1,114 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -enable-misched=false | FileCheck %s +; RUN: llc < %s -mtriple=arm64-eabi -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +; CHECK-GI: warning: Instruction selection used fallback path for sqshl1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshl1d_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshl_scalar +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshl_scalar_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshl1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshl1d_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshl_scalar +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshl_scalar_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshl1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshl1d_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshl_scalar +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshl_scalar_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshl1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshl1d_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshl_scalar +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshl_scalar_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshl1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshl1d_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshl_scalar +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshl_scalar_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshl1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshl1d_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshl_scalar +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshl_scalar_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshr1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshr_scalar +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshr1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshr_scalar +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu8b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu4h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu16b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu8h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu2d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu1d_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu_i64_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu_i32_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn1s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn8b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn4h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn16b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn8h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun1s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun8b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun4h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun16b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun8h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn1s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn8b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn4h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn16b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn8h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun1s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun8b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun4h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun16b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun8h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn1s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn8b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn4h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn16b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn8h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn1s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn8b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn4h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn16b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn8h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_ushl_vscalar_constant_shift +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_ushl_scalar_constant_shift +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_sshll_vscalar_constant_shift +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_sshll_scalar_constant_shift +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_sshll_scalar_constant_shift_m1 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ursra1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ursra_scalar +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srsra1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srsra_scalar +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli8b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli4h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli16b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli8h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli2d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu_zero_shift_amount +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lshr_trunc_v2i64_v2i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ashr_trunc_v2i64_v2i8 define <8 x i8> @sqshl8b(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sqshl8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqshl.8b v0, v0, v1 +; CHECK-NEXT: sqshl v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B @@ -19,7 +121,7 @@ define <4 x i16> @sqshl4h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqshl.4h v0, v0, v1 +; CHECK-NEXT: sqshl v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -32,7 +134,7 @@ define <2 x i32> @sqshl2s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqshl.2s v0, v0, v1 +; CHECK-NEXT: sqshl v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -97,7 +199,7 @@ define <8 x i8> @uqshl8b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: uqshl.8b v0, v0, v1 +; CHECK-NEXT: uqshl v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B @@ -110,7 +212,7 @@ define <4 x i16> @uqshl4h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: uqshl.4h v0, v0, v1 +; CHECK-NEXT: uqshl v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -123,7 +225,7 @@ define <2 x i32> @uqshl2s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: uqshl.2s v0, v0, v1 +; CHECK-NEXT: uqshl v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -136,7 +238,7 @@ define <16 x i8> @sqshl16b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqshl.16b v0, v0, v1 +; CHECK-NEXT: sqshl v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = load <16 x i8>, ptr %B @@ -149,7 +251,7 @@ define <8 x i16> @sqshl8h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqshl.8h v0, v0, v1 +; CHECK-NEXT: sqshl v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B @@ -162,7 +264,7 @@ define <4 x i32> @sqshl4s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqshl.4s v0, v0, v1 +; CHECK-NEXT: sqshl v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B @@ -175,7 +277,7 @@ define <2 x i64> @sqshl2d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqshl.2d v0, v0, v1 +; CHECK-NEXT: sqshl v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B @@ -188,7 +290,7 @@ define <16 x i8> @uqshl16b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqshl.16b v0, v0, v1 +; CHECK-NEXT: uqshl v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = load <16 x i8>, ptr %B @@ -201,7 +303,7 @@ define <8 x i16> @uqshl8h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqshl.8h v0, v0, v1 +; CHECK-NEXT: uqshl v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B @@ -214,7 +316,7 @@ define <4 x i32> @uqshl4s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqshl.4s v0, v0, v1 +; CHECK-NEXT: uqshl v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B @@ -227,7 +329,7 @@ define <2 x i64> @uqshl2d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqshl.2d v0, v0, v1 +; CHECK-NEXT: uqshl v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B @@ -315,7 +417,7 @@ define <8 x i8> @srshl8b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: srshl.8b v0, v0, v1 +; CHECK-NEXT: srshl v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B @@ -328,7 +430,7 @@ define <4 x i16> @srshl4h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: srshl.4h v0, v0, v1 +; CHECK-NEXT: srshl v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -341,7 +443,7 @@ define <2 x i32> @srshl2s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: srshl.2s v0, v0, v1 +; CHECK-NEXT: srshl v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -394,10 +496,10 @@ define i64 @srshl_scalar(ptr %A, ptr %B) nounwind { define i64 @srshl_scalar_constant(ptr %A) nounwind { ; CHECK-LABEL: srshl_scalar_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: mov w9, #1 // =0x1 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr x9, [x0] +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: srshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -411,7 +513,7 @@ define <8 x i8> @urshl8b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: urshl.8b v0, v0, v1 +; CHECK-NEXT: urshl v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B @@ -424,7 +526,7 @@ define <4 x i16> @urshl4h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: urshl.4h v0, v0, v1 +; CHECK-NEXT: urshl v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -437,7 +539,7 @@ define <2 x i32> @urshl2s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: urshl.2s v0, v0, v1 +; CHECK-NEXT: urshl v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -490,10 +592,10 @@ define i64 @urshl_scalar(ptr %A, ptr %B) nounwind { define i64 @urshl_scalar_constant(ptr %A) nounwind { ; CHECK-LABEL: urshl_scalar_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: mov w9, #1 // =0x1 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr x9, [x0] +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: urshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -507,7 +609,7 @@ define <16 x i8> @srshl16b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: srshl.16b v0, v0, v1 +; CHECK-NEXT: srshl v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = load <16 x i8>, ptr %B @@ -520,7 +622,7 @@ define <8 x i16> @srshl8h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: srshl.8h v0, v0, v1 +; CHECK-NEXT: srshl v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B @@ -533,7 +635,7 @@ define <4 x i32> @srshl4s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: srshl.4s v0, v0, v1 +; CHECK-NEXT: srshl v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B @@ -546,7 +648,7 @@ define <2 x i64> @srshl2d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: srshl.2d v0, v0, v1 +; CHECK-NEXT: srshl v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B @@ -559,7 +661,7 @@ define <16 x i8> @urshl16b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: urshl.16b v0, v0, v1 +; CHECK-NEXT: urshl v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = load <16 x i8>, ptr %B @@ -572,7 +674,7 @@ define <8 x i16> @urshl8h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: urshl.8h v0, v0, v1 +; CHECK-NEXT: urshl v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B @@ -585,7 +687,7 @@ define <4 x i32> @urshl4s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: urshl.4s v0, v0, v1 +; CHECK-NEXT: urshl v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B @@ -598,7 +700,7 @@ define <2 x i64> @urshl2d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: urshl.2d v0, v0, v1 +; CHECK-NEXT: urshl v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B @@ -633,7 +735,7 @@ define <8 x i8> @sqrshl8b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqrshl.8b v0, v0, v1 +; CHECK-NEXT: sqrshl v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B @@ -646,7 +748,7 @@ define <4 x i16> @sqrshl4h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqrshl.4h v0, v0, v1 +; CHECK-NEXT: sqrshl v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -659,7 +761,7 @@ define <2 x i32> @sqrshl2s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqrshl.2s v0, v0, v1 +; CHECK-NEXT: sqrshl v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -672,7 +774,7 @@ define <8 x i8> @uqrshl8b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: uqrshl.8b v0, v0, v1 +; CHECK-NEXT: uqrshl v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B @@ -685,7 +787,7 @@ define <4 x i16> @uqrshl4h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: uqrshl.4h v0, v0, v1 +; CHECK-NEXT: uqrshl v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -698,7 +800,7 @@ define <2 x i32> @uqrshl2s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: uqrshl.2s v0, v0, v1 +; CHECK-NEXT: uqrshl v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -711,7 +813,7 @@ define <16 x i8> @sqrshl16b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqrshl.16b v0, v0, v1 +; CHECK-NEXT: sqrshl v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = load <16 x i8>, ptr %B @@ -724,7 +826,7 @@ define <8 x i16> @sqrshl8h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqrshl.8h v0, v0, v1 +; CHECK-NEXT: sqrshl v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B @@ -737,7 +839,7 @@ define <4 x i32> @sqrshl4s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqrshl.4s v0, v0, v1 +; CHECK-NEXT: sqrshl v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B @@ -750,7 +852,7 @@ define <2 x i64> @sqrshl2d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqrshl.2d v0, v0, v1 +; CHECK-NEXT: sqrshl v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B @@ -803,10 +905,10 @@ define i64 @sqrshl_scalar(ptr %A, ptr %B) nounwind { define i64 @sqrshl_scalar_constant(ptr %A) nounwind { ; CHECK-LABEL: sqrshl_scalar_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: mov w9, #1 // =0x1 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr x9, [x0] +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: sqrshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -820,7 +922,7 @@ define <16 x i8> @uqrshl16b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqrshl.16b v0, v0, v1 +; CHECK-NEXT: uqrshl v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = load <16 x i8>, ptr %B @@ -833,7 +935,7 @@ define <8 x i16> @uqrshl8h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqrshl.8h v0, v0, v1 +; CHECK-NEXT: uqrshl v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B @@ -846,7 +948,7 @@ define <4 x i32> @uqrshl4s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqrshl.4s v0, v0, v1 +; CHECK-NEXT: uqrshl v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B @@ -859,7 +961,7 @@ define <2 x i64> @uqrshl2d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqrshl.2d v0, v0, v1 +; CHECK-NEXT: uqrshl v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B @@ -912,10 +1014,10 @@ define i64 @uqrshl_scalar(ptr %A, ptr %B) nounwind { define i64 @uqrshl_scalar_constant(ptr %A) nounwind { ; CHECK-LABEL: uqrshl_scalar_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: mov w9, #1 // =0x1 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr x9, [x0] +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: uqrshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -947,77 +1049,126 @@ declare <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind declare <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone define <8 x i8> @urshr8b(ptr %A) nounwind { -; CHECK-LABEL: urshr8b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: urshr.8b v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: urshr8b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: urshr v0.8b, v0.8b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: urshr8b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: urshl v0.8b, v1.8b, v0.8b +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) ret <8 x i8> %tmp3 } define <4 x i16> @urshr4h(ptr %A) nounwind { -; CHECK-LABEL: urshr4h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: urshr.4h v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: urshr4h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: urshr v0.4h, v0.4h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: urshr4h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: urshl v0.4h, v1.4h, v0.4h +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) ret <4 x i16> %tmp3 } define <2 x i32> @urshr2s(ptr %A) nounwind { -; CHECK-LABEL: urshr2s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: urshr.2s v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: urshr2s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: urshr v0.2s, v0.2s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: urshr2s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: urshl v0.2s, v1.2s, v0.2s +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) ret <2 x i32> %tmp3 } define <16 x i8> @urshr16b(ptr %A) nounwind { -; CHECK-LABEL: urshr16b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: urshr.16b v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: urshr16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: urshr v0.16b, v0.16b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: urshr16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: urshl v0.16b, v1.16b, v0.16b +; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) ret <16 x i8> %tmp3 } define <8 x i16> @urshr8h(ptr %A) nounwind { -; CHECK-LABEL: urshr8h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: urshr.8h v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: urshr8h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: urshr v0.8h, v0.8h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: urshr8h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: urshl v0.8h, v1.8h, v0.8h +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) ret <8 x i16> %tmp3 } define <4 x i32> @urshr4s(ptr %A) nounwind { -; CHECK-LABEL: urshr4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: urshr.4s v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: urshr4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: urshr v0.4s, v0.4s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: urshr4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: urshl v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) ret <4 x i32> %tmp3 } define <2 x i64> @urshr2d(ptr %A) nounwind { -; CHECK-LABEL: urshr2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: urshr.2d v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: urshr2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: urshr v0.2d, v0.2d, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: urshr2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: urshl v0.2d, v1.2d, v0.2d +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) ret <2 x i64> %tmp3 @@ -1047,77 +1198,126 @@ define i64 @urshr_scalar(ptr %A) nounwind { } define <8 x i8> @srshr8b(ptr %A) nounwind { -; CHECK-LABEL: srshr8b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: srshr.8b v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srshr8b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: srshr v0.8b, v0.8b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srshr8b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: srshl v0.8b, v1.8b, v0.8b +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) ret <8 x i8> %tmp3 } define <4 x i16> @srshr4h(ptr %A) nounwind { -; CHECK-LABEL: srshr4h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: srshr.4h v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srshr4h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: srshr v0.4h, v0.4h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srshr4h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: srshl v0.4h, v1.4h, v0.4h +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) ret <4 x i16> %tmp3 } define <2 x i32> @srshr2s(ptr %A) nounwind { -; CHECK-LABEL: srshr2s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: srshr.2s v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srshr2s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: srshr v0.2s, v0.2s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srshr2s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: srshl v0.2s, v1.2s, v0.2s +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) ret <2 x i32> %tmp3 } define <16 x i8> @srshr16b(ptr %A) nounwind { -; CHECK-LABEL: srshr16b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: srshr.16b v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srshr16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: srshr v0.16b, v0.16b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srshr16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: srshl v0.16b, v1.16b, v0.16b +; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) ret <16 x i8> %tmp3 } define <8 x i16> @srshr8h(ptr %A) nounwind { -; CHECK-LABEL: srshr8h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: srshr.8h v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srshr8h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: srshr v0.8h, v0.8h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srshr8h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: srshl v0.8h, v1.8h, v0.8h +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) ret <8 x i16> %tmp3 } define <4 x i32> @srshr4s(ptr %A) nounwind { -; CHECK-LABEL: srshr4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: srshr.4s v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srshr4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: srshr v0.4s, v0.4s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srshr4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: srshl v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) ret <4 x i32> %tmp3 } define <2 x i64> @srshr2d(ptr %A) nounwind { -; CHECK-LABEL: srshr2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: srshr.2d v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srshr2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: srshr v0.2d, v0.2d, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srshr2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: srshl v0.2d, v1.2d, v0.2d +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) ret <2 x i64> %tmp3 @@ -1150,7 +1350,7 @@ define <8 x i8> @sqshlu8b(ptr %A) nounwind { ; CHECK-LABEL: sqshlu8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: sqshlu.8b v0, v0, #1 +; CHECK-NEXT: sqshlu v0.8b, v0.8b, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %tmp1, <8 x i8> ) @@ -1161,7 +1361,7 @@ define <4 x i16> @sqshlu4h(ptr %A) nounwind { ; CHECK-LABEL: sqshlu4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: sqshlu.4h v0, v0, #1 +; CHECK-NEXT: sqshlu v0.4h, v0.4h, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> %tmp1, <4 x i16> ) @@ -1172,7 +1372,7 @@ define <2 x i32> @sqshlu2s(ptr %A) nounwind { ; CHECK-LABEL: sqshlu2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: sqshlu.2s v0, v0, #1 +; CHECK-NEXT: sqshlu v0.2s, v0.2s, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> %tmp1, <2 x i32> ) @@ -1183,7 +1383,7 @@ define <16 x i8> @sqshlu16b(ptr %A) nounwind { ; CHECK-LABEL: sqshlu16b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshlu.16b v0, v0, #1 +; CHECK-NEXT: sqshlu v0.16b, v0.16b, #1 ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %tmp1, <16 x i8> ) @@ -1194,7 +1394,7 @@ define <8 x i16> @sqshlu8h(ptr %A) nounwind { ; CHECK-LABEL: sqshlu8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshlu.8h v0, v0, #1 +; CHECK-NEXT: sqshlu v0.8h, v0.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> %tmp1, <8 x i16> ) @@ -1205,7 +1405,7 @@ define <4 x i32> @sqshlu4s(ptr %A) nounwind { ; CHECK-LABEL: sqshlu4s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshlu.4s v0, v0, #1 +; CHECK-NEXT: sqshlu v0.4s, v0.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> %tmp1, <4 x i32> ) @@ -1216,7 +1416,7 @@ define <2 x i64> @sqshlu2d(ptr %A) nounwind { ; CHECK-LABEL: sqshlu2d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshlu.2d v0, v0, #1 +; CHECK-NEXT: sqshlu v0.2d, v0.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %tmp1, <2 x i64> ) @@ -1275,7 +1475,7 @@ define <8 x i8> @rshrn8b(ptr %A) nounwind { ; CHECK-LABEL: rshrn8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: rshrn.8b v0, v0, #1 +; CHECK-NEXT: rshrn v0.8b, v0.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1) @@ -1286,7 +1486,7 @@ define <4 x i16> @rshrn4h(ptr %A) nounwind { ; CHECK-LABEL: rshrn4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: rshrn.4h v0, v0, #1 +; CHECK-NEXT: rshrn v0.4h, v0.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1) @@ -1297,7 +1497,7 @@ define <2 x i32> @rshrn2s(ptr %A) nounwind { ; CHECK-LABEL: rshrn2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: rshrn.2s v0, v0, #1 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1) @@ -1309,7 +1509,7 @@ define <16 x i8> @rshrn16b(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: rshrn2.16b v0, v1, #1 +; CHECK-NEXT: rshrn2 v0.16b, v1.8h, #1 ; CHECK-NEXT: ret %out = load <8 x i8>, ptr %ret %tmp1 = load <8 x i16>, ptr %A @@ -1323,7 +1523,7 @@ define <8 x i16> @rshrn8h(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: rshrn2.8h v0, v1, #1 +; CHECK-NEXT: rshrn2 v0.8h, v1.4s, #1 ; CHECK-NEXT: ret %out = load <4 x i16>, ptr %ret %tmp1 = load <4 x i32>, ptr %A @@ -1337,7 +1537,7 @@ define <4 x i32> @rshrn4s(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: rshrn2.4s v0, v1, #1 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #1 ; CHECK-NEXT: ret %out = load <2 x i32>, ptr %ret %tmp1 = load <2 x i64>, ptr %A @@ -1354,7 +1554,7 @@ define <8 x i8> @shrn8b(ptr %A) nounwind { ; CHECK-LABEL: shrn8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: shrn.8b v0, v0, #1 +; CHECK-NEXT: shrn v0.8b, v0.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = lshr <8 x i16> %tmp1, @@ -1366,7 +1566,7 @@ define <4 x i16> @shrn4h(ptr %A) nounwind { ; CHECK-LABEL: shrn4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: shrn.4h v0, v0, #1 +; CHECK-NEXT: shrn v0.4h, v0.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = lshr <4 x i32> %tmp1, @@ -1378,7 +1578,7 @@ define <2 x i32> @shrn2s(ptr %A) nounwind { ; CHECK-LABEL: shrn2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: shrn.2s v0, v0, #1 +; CHECK-NEXT: shrn v0.2s, v0.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = lshr <2 x i64> %tmp1, @@ -1391,7 +1591,7 @@ define <16 x i8> @shrn16b(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: shrn2.16b v0, v1, #1 +; CHECK-NEXT: shrn2 v0.16b, v1.8h, #1 ; CHECK-NEXT: ret %out = load <8 x i8>, ptr %ret %tmp1 = load <8 x i16>, ptr %A @@ -1406,7 +1606,7 @@ define <8 x i16> @shrn8h(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: shrn2.8h v0, v1, #1 +; CHECK-NEXT: shrn2 v0.8h, v1.4s, #1 ; CHECK-NEXT: ret %out = load <4 x i16>, ptr %ret %tmp1 = load <4 x i32>, ptr %A @@ -1421,7 +1621,7 @@ define <4 x i32> @shrn4s(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: shrn2.4s v0, v1, #1 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #1 ; CHECK-NEXT: ret %out = load <2 x i32>, ptr %ret %tmp1 = load <2 x i64>, ptr %A @@ -1450,7 +1650,7 @@ define <8 x i8> @sqshrn8b(ptr %A) nounwind { ; CHECK-LABEL: sqshrn8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshrn.8b v0, v0, #1 +; CHECK-NEXT: sqshrn v0.8b, v0.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1) @@ -1461,7 +1661,7 @@ define <4 x i16> @sqshrn4h(ptr %A) nounwind { ; CHECK-LABEL: sqshrn4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshrn.4h v0, v0, #1 +; CHECK-NEXT: sqshrn v0.4h, v0.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1) @@ -1472,7 +1672,7 @@ define <2 x i32> @sqshrn2s(ptr %A) nounwind { ; CHECK-LABEL: sqshrn2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshrn.2s v0, v0, #1 +; CHECK-NEXT: sqshrn v0.2s, v0.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1) @@ -1485,7 +1685,7 @@ define <16 x i8> @sqshrn16b(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqshrn2.16b v0, v1, #1 +; CHECK-NEXT: sqshrn2 v0.16b, v1.8h, #1 ; CHECK-NEXT: ret %out = load <8 x i8>, ptr %ret %tmp1 = load <8 x i16>, ptr %A @@ -1499,7 +1699,7 @@ define <8 x i16> @sqshrn8h(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqshrn2.8h v0, v1, #1 +; CHECK-NEXT: sqshrn2 v0.8h, v1.4s, #1 ; CHECK-NEXT: ret %out = load <4 x i16>, ptr %ret %tmp1 = load <4 x i32>, ptr %A @@ -1513,7 +1713,7 @@ define <4 x i32> @sqshrn4s(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqshrn2.4s v0, v1, #1 +; CHECK-NEXT: sqshrn2 v0.4s, v1.2d, #1 ; CHECK-NEXT: ret %out = load <2 x i32>, ptr %ret %tmp1 = load <2 x i64>, ptr %A @@ -1542,7 +1742,7 @@ define <8 x i8> @sqshrun8b(ptr %A) nounwind { ; CHECK-LABEL: sqshrun8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshrun.8b v0, v0, #1 +; CHECK-NEXT: sqshrun v0.8b, v0.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1) @@ -1553,7 +1753,7 @@ define <4 x i16> @sqshrun4h(ptr %A) nounwind { ; CHECK-LABEL: sqshrun4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshrun.4h v0, v0, #1 +; CHECK-NEXT: sqshrun v0.4h, v0.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1) @@ -1564,7 +1764,7 @@ define <2 x i32> @sqshrun2s(ptr %A) nounwind { ; CHECK-LABEL: sqshrun2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshrun.2s v0, v0, #1 +; CHECK-NEXT: sqshrun v0.2s, v0.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1) @@ -1576,7 +1776,7 @@ define <16 x i8> @sqshrun16b(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqshrun2.16b v0, v1, #1 +; CHECK-NEXT: sqshrun2 v0.16b, v1.8h, #1 ; CHECK-NEXT: ret %out = load <8 x i8>, ptr %ret %tmp1 = load <8 x i16>, ptr %A @@ -1590,7 +1790,7 @@ define <8 x i16> @sqshrun8h(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqshrun2.8h v0, v1, #1 +; CHECK-NEXT: sqshrun2 v0.8h, v1.4s, #1 ; CHECK-NEXT: ret %out = load <4 x i16>, ptr %ret %tmp1 = load <4 x i32>, ptr %A @@ -1604,7 +1804,7 @@ define <4 x i32> @sqshrun4s(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqshrun2.4s v0, v1, #1 +; CHECK-NEXT: sqshrun2 v0.4s, v1.2d, #1 ; CHECK-NEXT: ret %out = load <2 x i32>, ptr %ret %tmp1 = load <2 x i64>, ptr %A @@ -1633,7 +1833,7 @@ define <8 x i8> @sqrshrn8b(ptr %A) nounwind { ; CHECK-LABEL: sqrshrn8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqrshrn.8b v0, v0, #1 +; CHECK-NEXT: sqrshrn v0.8b, v0.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1) @@ -1644,7 +1844,7 @@ define <4 x i16> @sqrshrn4h(ptr %A) nounwind { ; CHECK-LABEL: sqrshrn4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqrshrn.4h v0, v0, #1 +; CHECK-NEXT: sqrshrn v0.4h, v0.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1) @@ -1655,7 +1855,7 @@ define <2 x i32> @sqrshrn2s(ptr %A) nounwind { ; CHECK-LABEL: sqrshrn2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqrshrn.2s v0, v0, #1 +; CHECK-NEXT: sqrshrn v0.2s, v0.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1) @@ -1667,7 +1867,7 @@ define <16 x i8> @sqrshrn16b(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqrshrn2.16b v0, v1, #1 +; CHECK-NEXT: sqrshrn2 v0.16b, v1.8h, #1 ; CHECK-NEXT: ret %out = load <8 x i8>, ptr %ret %tmp1 = load <8 x i16>, ptr %A @@ -1681,7 +1881,7 @@ define <8 x i16> @sqrshrn8h(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqrshrn2.8h v0, v1, #1 +; CHECK-NEXT: sqrshrn2 v0.8h, v1.4s, #1 ; CHECK-NEXT: ret %out = load <4 x i16>, ptr %ret %tmp1 = load <4 x i32>, ptr %A @@ -1695,7 +1895,7 @@ define <4 x i32> @sqrshrn4s(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqrshrn2.4s v0, v1, #1 +; CHECK-NEXT: sqrshrn2 v0.4s, v1.2d, #1 ; CHECK-NEXT: ret %out = load <2 x i32>, ptr %ret %tmp1 = load <2 x i64>, ptr %A @@ -1724,7 +1924,7 @@ define <8 x i8> @sqrshrun8b(ptr %A) nounwind { ; CHECK-LABEL: sqrshrun8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqrshrun.8b v0, v0, #1 +; CHECK-NEXT: sqrshrun v0.8b, v0.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1) @@ -1735,7 +1935,7 @@ define <4 x i16> @sqrshrun4h(ptr %A) nounwind { ; CHECK-LABEL: sqrshrun4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqrshrun.4h v0, v0, #1 +; CHECK-NEXT: sqrshrun v0.4h, v0.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1) @@ -1746,7 +1946,7 @@ define <2 x i32> @sqrshrun2s(ptr %A) nounwind { ; CHECK-LABEL: sqrshrun2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqrshrun.2s v0, v0, #1 +; CHECK-NEXT: sqrshrun v0.2s, v0.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1) @@ -1758,7 +1958,7 @@ define <16 x i8> @sqrshrun16b(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqrshrun2.16b v0, v1, #1 +; CHECK-NEXT: sqrshrun2 v0.16b, v1.8h, #1 ; CHECK-NEXT: ret %out = load <8 x i8>, ptr %ret %tmp1 = load <8 x i16>, ptr %A @@ -1772,7 +1972,7 @@ define <8 x i16> @sqrshrun8h(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqrshrun2.8h v0, v1, #1 +; CHECK-NEXT: sqrshrun2 v0.8h, v1.4s, #1 ; CHECK-NEXT: ret %out = load <4 x i16>, ptr %ret %tmp1 = load <4 x i32>, ptr %A @@ -1786,7 +1986,7 @@ define <4 x i32> @sqrshrun4s(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqrshrun2.4s v0, v1, #1 +; CHECK-NEXT: sqrshrun2 v0.4s, v1.2d, #1 ; CHECK-NEXT: ret %out = load <2 x i32>, ptr %ret %tmp1 = load <2 x i64>, ptr %A @@ -1815,7 +2015,7 @@ define <8 x i8> @uqrshrn8b(ptr %A) nounwind { ; CHECK-LABEL: uqrshrn8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: uqrshrn.8b v0, v0, #1 +; CHECK-NEXT: uqrshrn v0.8b, v0.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1) @@ -1826,7 +2026,7 @@ define <4 x i16> @uqrshrn4h(ptr %A) nounwind { ; CHECK-LABEL: uqrshrn4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: uqrshrn.4h v0, v0, #1 +; CHECK-NEXT: uqrshrn v0.4h, v0.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1) @@ -1837,7 +2037,7 @@ define <2 x i32> @uqrshrn2s(ptr %A) nounwind { ; CHECK-LABEL: uqrshrn2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: uqrshrn.2s v0, v0, #1 +; CHECK-NEXT: uqrshrn v0.2s, v0.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1) @@ -1849,7 +2049,7 @@ define <16 x i8> @uqrshrn16b(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqrshrn2.16b v0, v1, #1 +; CHECK-NEXT: uqrshrn2 v0.16b, v1.8h, #1 ; CHECK-NEXT: ret %out = load <8 x i8>, ptr %ret %tmp1 = load <8 x i16>, ptr %A @@ -1863,7 +2063,7 @@ define <8 x i16> @uqrshrn8h(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqrshrn2.8h v0, v1, #1 +; CHECK-NEXT: uqrshrn2 v0.8h, v1.4s, #1 ; CHECK-NEXT: ret %out = load <4 x i16>, ptr %ret %tmp1 = load <4 x i32>, ptr %A @@ -1877,7 +2077,7 @@ define <4 x i32> @uqrshrn4s(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqrshrn2.4s v0, v1, #1 +; CHECK-NEXT: uqrshrn2 v0.4s, v1.2d, #1 ; CHECK-NEXT: ret %out = load <2 x i32>, ptr %ret %tmp1 = load <2 x i64>, ptr %A @@ -1906,7 +2106,7 @@ define <8 x i8> @uqshrn8b(ptr %A) nounwind { ; CHECK-LABEL: uqshrn8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: uqshrn.8b v0, v0, #1 +; CHECK-NEXT: uqshrn v0.8b, v0.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1) @@ -1917,7 +2117,7 @@ define <4 x i16> @uqshrn4h(ptr %A) nounwind { ; CHECK-LABEL: uqshrn4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: uqshrn.4h v0, v0, #1 +; CHECK-NEXT: uqshrn v0.4h, v0.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1) @@ -1928,7 +2128,7 @@ define <2 x i32> @uqshrn2s(ptr %A) nounwind { ; CHECK-LABEL: uqshrn2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: uqshrn.2s v0, v0, #1 +; CHECK-NEXT: uqshrn v0.2s, v0.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1) @@ -1940,7 +2140,7 @@ define <16 x i8> @uqshrn16b(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqshrn2.16b v0, v1, #1 +; CHECK-NEXT: uqshrn2 v0.16b, v1.8h, #1 ; CHECK-NEXT: ret %out = load <8 x i8>, ptr %ret %tmp1 = load <8 x i16>, ptr %A @@ -1954,7 +2154,7 @@ define <8 x i16> @uqshrn8h(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqshrn2.8h v0, v1, #1 +; CHECK-NEXT: uqshrn2 v0.8h, v1.4s, #1 ; CHECK-NEXT: ret %out = load <4 x i16>, ptr %ret %tmp1 = load <4 x i32>, ptr %A @@ -1968,7 +2168,7 @@ define <4 x i32> @uqshrn4s(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqshrn2.4s v0, v1, #1 +; CHECK-NEXT: uqshrn2 v0.4s, v1.2d, #1 ; CHECK-NEXT: ret %out = load <2 x i32>, ptr %ret %tmp1 = load <2 x i64>, ptr %A @@ -1986,7 +2186,7 @@ define <8 x i16> @ushll8h(ptr %A) nounwind { ; CHECK-LABEL: ushll8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ushll.8h v0, v0, #1 +; CHECK-NEXT: ushll v0.8h, v0.8b, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = zext <8 x i8> %tmp1 to <8 x i16> @@ -1998,7 +2198,7 @@ define <4 x i32> @ushll4s(ptr %A) nounwind { ; CHECK-LABEL: ushll4s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ushll.4s v0, v0, #1 +; CHECK-NEXT: ushll v0.4s, v0.4h, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = zext <4 x i16> %tmp1 to <4 x i32> @@ -2010,7 +2210,7 @@ define <2 x i64> @ushll2d(ptr %A) nounwind { ; CHECK-LABEL: ushll2d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ushll.2d v0, v0, #1 +; CHECK-NEXT: ushll v0.2d, v0.2s, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = zext <2 x i32> %tmp1 to <2 x i64> @@ -2019,11 +2219,18 @@ define <2 x i64> @ushll2d(ptr %A) nounwind { } define <8 x i16> @ushll2_8h(ptr %A) nounwind { -; CHECK-LABEL: ushll2_8h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0, #8] -; CHECK-NEXT: ushll.8h v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ushll2_8h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0, #8] +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ushll2_8h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #1 +; CHECK-GI-NEXT: ret %load1 = load <16 x i8>, ptr %A %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> %tmp2 = zext <8 x i8> %tmp1 to <8 x i16> @@ -2032,11 +2239,18 @@ define <8 x i16> @ushll2_8h(ptr %A) nounwind { } define <4 x i32> @ushll2_4s(ptr %A) nounwind { -; CHECK-LABEL: ushll2_4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0, #8] -; CHECK-NEXT: ushll.4s v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ushll2_4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0, #8] +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ushll2_4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #1 +; CHECK-GI-NEXT: ret %load1 = load <8 x i16>, ptr %A %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> %tmp2 = zext <4 x i16> %tmp1 to <4 x i32> @@ -2045,11 +2259,18 @@ define <4 x i32> @ushll2_4s(ptr %A) nounwind { } define <2 x i64> @ushll2_2d(ptr %A) nounwind { -; CHECK-LABEL: ushll2_2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0, #8] -; CHECK-NEXT: ushll.2d v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ushll2_2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0, #8] +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ushll2_2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #1 +; CHECK-GI-NEXT: ret %load1 = load <4 x i32>, ptr %A %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> %tmp2 = zext <2 x i32> %tmp1 to <2 x i64> @@ -2064,24 +2285,32 @@ declare <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64>, <2 x i64>) declare <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64>, <1 x i64>) declare i64 @llvm.aarch64.neon.ushl.i64(i64, i64) -define <8 x i16> @neon.ushll8h_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.ushll8h_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ushll.8h v0, v0, #1 -; CHECK-NEXT: ret +define <8 x i16> @neon_ushll8h_constant_shift(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_ushll8h_constant_shift: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_ushll8h_constant_shift: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr d0, [x0] +; CHECK-GI-NEXT: movi v1.8h, #1 +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: ushl v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = zext <8 x i8> %tmp1 to <8 x i16> %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp2, <8 x i16> ) ret <8 x i16> %tmp3 } -define <8 x i16> @neon.ushl8h_no_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.ushl8h_no_constant_shift: +define <8 x i16> @neon_ushl8h_no_constant_shift(ptr %A) nounwind { +; CHECK-LABEL: neon_ushl8h_no_constant_shift: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ushll.8h v0, v0, #0 -; CHECK-NEXT: ushl.8h v0, v0, v0 +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushl v0.8h, v0.8h, v0.8h ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = zext <8 x i8> %tmp1 to <8 x i16> @@ -2089,36 +2318,76 @@ define <8 x i16> @neon.ushl8h_no_constant_shift(ptr %A) nounwind { ret <8 x i16> %tmp3 } -define <4 x i32> @neon.ushl8h_constant_shift_extend_not_2x(ptr %A) nounwind { -; CHECK-LABEL: neon.ushl8h_constant_shift_extend_not_2x: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ushll.8h v0, v0, #0 -; CHECK-NEXT: ushll.4s v0, v0, #1 -; CHECK-NEXT: ret +define <4 x i32> @neon_ushl8h_constant_shift_extend_not_2x(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_ushl8h_constant_shift_extend_not_2x: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_ushl8h_constant_shift_extend_not_2x: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: movi v0.4s, #1 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: uxtb w8, w8 +; CHECK-GI-NEXT: mov b2, v1.b[2] +; CHECK-GI-NEXT: mov b3, v1.b[1] +; CHECK-GI-NEXT: mov b4, v1.b[3] +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fmov w9, s2 +; CHECK-GI-NEXT: fmov w10, s3 +; CHECK-GI-NEXT: fmov w11, s4 +; CHECK-GI-NEXT: uxtb w9, w9 +; CHECK-GI-NEXT: uxtb w10, w10 +; CHECK-GI-NEXT: uxtb w11, w11 +; CHECK-GI-NEXT: fmov s2, w9 +; CHECK-GI-NEXT: mov v1.h[1], w10 +; CHECK-GI-NEXT: mov v2.h[1], w11 +; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0 +; CHECK-GI-NEXT: mov v1.d[1], v2.d[0] +; CHECK-GI-NEXT: ushl v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i8>, ptr %A %tmp2 = zext <4 x i8> %tmp1 to <4 x i32> %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> ) ret <4 x i32> %tmp3 } -define <8 x i16> @neon.ushl8_noext_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.ushl8_noext_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: add.8h v0, v0, v0 -; CHECK-NEXT: ret +define <8 x i16> @neon_ushl8_noext_constant_shift(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_ushl8_noext_constant_shift: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: add v0.8h, v0.8h, v0.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_ushl8_noext_constant_shift: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.8h, #1 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: ushl v0.8h, v1.8h, v0.8h +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp1, <8 x i16> ) ret <8 x i16> %tmp3 } -define <4 x i32> @neon.ushll4s_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.ushll4s_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ushll.4s v0, v0, #1 -; CHECK-NEXT: ret +define <4 x i32> @neon_ushll4s_constant_shift(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_ushll4s_constant_shift: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_ushll4s_constant_shift: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr d0, [x0] +; CHECK-GI-NEXT: movi v1.4s, #1 +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = zext <4 x i16> %tmp1 to <4 x i32> %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> ) @@ -2126,13 +2395,21 @@ define <4 x i32> @neon.ushll4s_constant_shift(ptr %A) nounwind { } ; FIXME: unnecessary ushll.4s v0, v0, #0? -define <4 x i32> @neon.ushll4s_neg_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.ushll4s_neg_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: ushr.4s v0, v0, #1 -; CHECK-NEXT: ret +define <4 x i32> @neon_ushll4s_neg_constant_shift(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_ushll4s_neg_constant_shift: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushr v0.4s, v0.4s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_ushll4s_neg_constant_shift: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr d0, [x0] +; CHECK-GI-NEXT: movi v1.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = zext <4 x i16> %tmp1 to <4 x i32> %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> ) @@ -2140,35 +2417,52 @@ define <4 x i32> @neon.ushll4s_neg_constant_shift(ptr %A) nounwind { } ; FIXME: should be constant folded. -define <4 x i32> @neon.ushll4s_constant_fold() nounwind { -; CHECK-LABEL: neon.ushll4s_constant_fold: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI160_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI160_0] -; CHECK-NEXT: add.4s v0, v0, v0 -; CHECK-NEXT: ret +define <4 x i32> @neon_ushll4s_constant_fold() nounwind { +; CHECK-SD-LABEL: neon_ushll4s_constant_fold: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adrp x8, .LCPI160_0 +; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI160_0] +; CHECK-SD-NEXT: add v0.4s, v0.4s, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_ushll4s_constant_fold: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.4s, #1 +; CHECK-GI-NEXT: adrp x8, .LCPI160_0 +; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI160_0] +; CHECK-GI-NEXT: ushl v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: ret %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> , <4 x i32> ) ret <4 x i32> %tmp3 } -define <2 x i64> @neon.ushll2d_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.ushll2d_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ushll.2d v0, v0, #1 -; CHECK-NEXT: ret +define <2 x i64> @neon_ushll2d_constant_shift(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_ushll2d_constant_shift: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_ushll2d_constant_shift: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr d0, [x0] +; CHECK-GI-NEXT: adrp x8, .LCPI161_0 +; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI161_0] +; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: ushl v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = zext <2 x i32> %tmp1 to <2 x i64> %tmp3 = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %tmp2, <2 x i64> ) ret <2 x i64> %tmp3 } -define <1 x i64> @neon.ushl_vscalar_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.ushl_vscalar_constant_shift: +define <1 x i64> @neon_ushl_vscalar_constant_shift(ptr %A) nounwind { +; CHECK-LABEL: neon_ushl_vscalar_constant_shift: ; CHECK: // %bb.0: -; CHECK-NEXT: movi.2d v1, #0000000000000000 -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: zip1.2s v0, v0, v1 +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: ldr s1, [x0] +; CHECK-NEXT: zip1 v0.2s, v1.2s, v0.2s ; CHECK-NEXT: shl d0, d0, #1 ; CHECK-NEXT: ret %tmp1 = load <1 x i32>, ptr %A @@ -2177,8 +2471,8 @@ define <1 x i64> @neon.ushl_vscalar_constant_shift(ptr %A) nounwind { ret <1 x i64> %tmp3 } -define i64 @neon.ushl_scalar_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.ushl_scalar_constant_shift: +define i64 @neon_ushl_scalar_constant_shift(ptr %A) nounwind { +; CHECK-LABEL: neon_ushl_scalar_constant_shift: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: fmov d0, x8 @@ -2195,7 +2489,7 @@ define <8 x i16> @sshll8h(ptr %A) nounwind { ; CHECK-LABEL: sshll8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: sshll.8h v0, v0, #1 +; CHECK-NEXT: sshll v0.8h, v0.8b, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = sext <8 x i8> %tmp1 to <8 x i16> @@ -2207,7 +2501,7 @@ define <2 x i64> @sshll2d(ptr %A) nounwind { ; CHECK-LABEL: sshll2d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: sshll.2d v0, v0, #1 +; CHECK-NEXT: sshll v0.2d, v0.2s, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = sext <2 x i32> %tmp1 to <2 x i64> @@ -2222,85 +2516,156 @@ declare <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64>, <2 x i64>) declare <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64>, <1 x i64>) declare i64 @llvm.aarch64.neon.sshl.i64(i64, i64) -define <16 x i8> @neon.sshl16b_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.sshl16b_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: add.16b v0, v0, v0 -; CHECK-NEXT: ret +define <16 x i8> @neon_sshl16b_constant_shift(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_sshl16b_constant_shift: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: add v0.16b, v0.16b, v0.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_sshl16b_constant_shift: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.16b, #1 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: sshl v0.16b, v1.16b, v0.16b +; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) ret <16 x i8> %tmp2 } -define <16 x i8> @neon.sshl16b_non_splat_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.sshl16b_non_splat_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI167_0 -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI167_0] -; CHECK-NEXT: sshl.16b v0, v0, v1 -; CHECK-NEXT: ret +define <16 x i8> @neon_sshl16b_non_splat_constant_shift(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_sshl16b_non_splat_constant_shift: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adrp x8, .LCPI167_0 +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI167_0] +; CHECK-SD-NEXT: sshl v0.16b, v0.16b, v1.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_sshl16b_non_splat_constant_shift: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI167_0 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI167_0] +; CHECK-GI-NEXT: sshl v0.16b, v1.16b, v0.16b +; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) ret <16 x i8> %tmp2 } -define <16 x i8> @neon.sshl16b_neg_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.sshl16b_neg_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sshr.16b v0, v0, #2 -; CHECK-NEXT: ret +define <16 x i8> @neon_sshl16b_neg_constant_shift(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_sshl16b_neg_constant_shift: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: sshr v0.16b, v0.16b, #2 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_sshl16b_neg_constant_shift: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.16b, #254 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: sshl v0.16b, v1.16b, v0.16b +; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) ret <16 x i8> %tmp2 } -define <8 x i16> @neon.sshll8h_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.sshll8h_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: sshll.8h v0, v0, #1 -; CHECK-NEXT: ret +define <8 x i16> @neon_sshll8h_constant_shift(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_sshll8h_constant_shift: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_sshll8h_constant_shift: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr d0, [x0] +; CHECK-GI-NEXT: movi v1.8h, #1 +; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: sshl v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = sext <8 x i8> %tmp1 to <8 x i16> %tmp3 = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %tmp2, <8 x i16> ) ret <8 x i16> %tmp3 } -define <4 x i32> @neon.sshl4s_wrong_ext_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.sshl4s_wrong_ext_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: sshll.8h v0, v0, #0 -; CHECK-NEXT: sshll.4s v0, v0, #1 -; CHECK-NEXT: ret +define <4 x i32> @neon_sshl4s_wrong_ext_constant_shift(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_sshl4s_wrong_ext_constant_shift: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_sshl4s_wrong_ext_constant_shift: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: movi v0.4s, #1 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: sxtb w8, w8 +; CHECK-GI-NEXT: mov b2, v1.b[2] +; CHECK-GI-NEXT: mov b3, v1.b[1] +; CHECK-GI-NEXT: mov b4, v1.b[3] +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fmov w9, s2 +; CHECK-GI-NEXT: fmov w10, s3 +; CHECK-GI-NEXT: fmov w11, s4 +; CHECK-GI-NEXT: sxtb w9, w9 +; CHECK-GI-NEXT: sxtb w10, w10 +; CHECK-GI-NEXT: sxtb w11, w11 +; CHECK-GI-NEXT: fmov s2, w9 +; CHECK-GI-NEXT: mov v1.h[1], w10 +; CHECK-GI-NEXT: mov v2.h[1], w11 +; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT: sshll v2.4s, v2.4h, #0 +; CHECK-GI-NEXT: mov v1.d[1], v2.d[0] +; CHECK-GI-NEXT: sshl v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i8>, ptr %A %tmp2 = sext <4 x i8> %tmp1 to <4 x i32> %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> ) ret <4 x i32> %tmp3 } -define <4 x i32> @neon.sshll4s_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.sshll4s_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: sshll.4s v0, v0, #1 -; CHECK-NEXT: ret +define <4 x i32> @neon_sshll4s_constant_shift(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_sshll4s_constant_shift: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_sshll4s_constant_shift: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr d0, [x0] +; CHECK-GI-NEXT: movi v1.4s, #1 +; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = sext <4 x i16> %tmp1 to <4 x i32> %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> ) ret <4 x i32> %tmp3 } -define <4 x i32> @neon.sshll4s_neg_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.sshll4s_neg_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: sshll.4s v0, v0, #0 -; CHECK-NEXT: sshr.4s v0, v0, #1 -; CHECK-NEXT: ret +define <4 x i32> @neon_sshll4s_neg_constant_shift(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_sshll4s_neg_constant_shift: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: sshr v0.4s, v0.4s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_sshll4s_neg_constant_shift: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr d0, [x0] +; CHECK-GI-NEXT: movi v1.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = sext <4 x i16> %tmp1 to <4 x i32> %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> ) @@ -2308,46 +2673,70 @@ define <4 x i32> @neon.sshll4s_neg_constant_shift(ptr %A) nounwind { } ; FIXME: should be constant folded. -define <4 x i32> @neon.sshl4s_constant_fold() nounwind { -; CHECK-LABEL: neon.sshl4s_constant_fold: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI173_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI173_0] -; CHECK-NEXT: shl.4s v0, v0, #2 -; CHECK-NEXT: ret +define <4 x i32> @neon_sshl4s_constant_fold() nounwind { +; CHECK-SD-LABEL: neon_sshl4s_constant_fold: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adrp x8, .LCPI173_0 +; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI173_0] +; CHECK-SD-NEXT: shl v0.4s, v0.4s, #2 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_sshl4s_constant_fold: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.4s, #2 +; CHECK-GI-NEXT: adrp x8, .LCPI173_0 +; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI173_0] +; CHECK-GI-NEXT: sshl v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: ret %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> , <4 x i32> ) ret <4 x i32> %tmp3 } -define <4 x i32> @neon.sshl4s_no_fold(ptr %A) nounwind { -; CHECK-LABEL: neon.sshl4s_no_fold: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: add.4s v0, v0, v0 -; CHECK-NEXT: ret +define <4 x i32> @neon_sshl4s_no_fold(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_sshl4s_no_fold: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: add v0.4s, v0.4s, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_sshl4s_no_fold: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.4s, #1 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: sshl v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) ret <4 x i32> %tmp3 } -define <2 x i64> @neon.sshll2d_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.sshll2d_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: sshll.2d v0, v0, #1 -; CHECK-NEXT: ret +define <2 x i64> @neon_sshll2d_constant_shift(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_sshll2d_constant_shift: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_sshll2d_constant_shift: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr d0, [x0] +; CHECK-GI-NEXT: adrp x8, .LCPI175_0 +; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI175_0] +; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: sshl v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = sext <2 x i32> %tmp1 to <2 x i64> %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> ) ret <2 x i64> %tmp3 } -define <1 x i64> @neon.sshll_vscalar_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.sshll_vscalar_constant_shift: +define <1 x i64> @neon_sshll_vscalar_constant_shift(ptr %A) nounwind { +; CHECK-LABEL: neon_sshll_vscalar_constant_shift: ; CHECK: // %bb.0: -; CHECK-NEXT: movi.2d v1, #0000000000000000 -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: zip1.2s v0, v0, v1 +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: ldr s1, [x0] +; CHECK-NEXT: zip1 v0.2s, v1.2s, v0.2s ; CHECK-NEXT: shl d0, d0, #1 ; CHECK-NEXT: ret %tmp1 = load <1 x i32>, ptr %A @@ -2356,8 +2745,8 @@ define <1 x i64> @neon.sshll_vscalar_constant_shift(ptr %A) nounwind { ret <1 x i64> %tmp3 } -define i64 @neon.sshll_scalar_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.sshll_scalar_constant_shift: +define i64 @neon_sshll_scalar_constant_shift(ptr %A) nounwind { +; CHECK-LABEL: neon_sshll_scalar_constant_shift: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: fmov d0, x8 @@ -2370,8 +2759,8 @@ define i64 @neon.sshll_scalar_constant_shift(ptr %A) nounwind { ret i64 %tmp3 } -define i64 @neon.sshll_scalar_constant_shift_m1(ptr %A) nounwind { -; CHECK-LABEL: neon.sshll_scalar_constant_shift_m1: +define i64 @neon_sshll_scalar_constant_shift_m1(ptr %A) nounwind { +; CHECK-LABEL: neon_sshll_scalar_constant_shift_m1: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: fmov d0, x8 @@ -2385,34 +2774,58 @@ define i64 @neon.sshll_scalar_constant_shift_m1(ptr %A) nounwind { } ; FIXME: should be constant folded. -define <2 x i64> @neon.sshl2d_constant_fold() nounwind { -; CHECK-LABEL: neon.sshl2d_constant_fold: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI179_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI179_0] -; CHECK-NEXT: add.2d v0, v0, v0 -; CHECK-NEXT: ret +define <2 x i64> @neon_sshl2d_constant_fold() nounwind { +; CHECK-SD-LABEL: neon_sshl2d_constant_fold: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adrp x8, .LCPI179_0 +; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI179_0] +; CHECK-SD-NEXT: add v0.2d, v0.2d, v0.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_sshl2d_constant_fold: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI179_1 +; CHECK-GI-NEXT: adrp x9, .LCPI179_0 +; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI179_1] +; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI179_0] +; CHECK-GI-NEXT: sshl v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: ret %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> , <2 x i64> ) ret <2 x i64> %tmp3 } -define <2 x i64> @neon.sshl2d_no_fold(ptr %A) nounwind { -; CHECK-LABEL: neon.sshl2d_no_fold: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: shl.2d v0, v0, #2 -; CHECK-NEXT: ret +define <2 x i64> @neon_sshl2d_no_fold(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_sshl2d_no_fold: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: shl v0.2d, v0.2d, #2 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_sshl2d_no_fold: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI180_0 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI180_0] +; CHECK-GI-NEXT: sshl v0.2d, v1.2d, v0.2d +; CHECK-GI-NEXT: ret %tmp2 = load <2 x i64>, ptr %A %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> ) ret <2 x i64> %tmp3 } define <8 x i16> @sshll2_8h(ptr %A) nounwind { -; CHECK-LABEL: sshll2_8h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0, #8] -; CHECK-NEXT: sshll.8h v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sshll2_8h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0, #8] +; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sshll2_8h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #1 +; CHECK-GI-NEXT: ret %load1 = load <16 x i8>, ptr %A %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> %tmp2 = sext <8 x i8> %tmp1 to <8 x i16> @@ -2421,11 +2834,18 @@ define <8 x i16> @sshll2_8h(ptr %A) nounwind { } define <4 x i32> @sshll2_4s(ptr %A) nounwind { -; CHECK-LABEL: sshll2_4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0, #8] -; CHECK-NEXT: sshll.4s v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sshll2_4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0, #8] +; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sshll2_4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #1 +; CHECK-GI-NEXT: ret %load1 = load <8 x i16>, ptr %A %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> %tmp2 = sext <4 x i16> %tmp1 to <4 x i32> @@ -2434,11 +2854,18 @@ define <4 x i32> @sshll2_4s(ptr %A) nounwind { } define <2 x i64> @sshll2_2d(ptr %A) nounwind { -; CHECK-LABEL: sshll2_2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0, #8] -; CHECK-NEXT: sshll.2d v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sshll2_2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0, #8] +; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sshll2_2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #1 +; CHECK-GI-NEXT: ret %load1 = load <4 x i32>, ptr %A %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> %tmp2 = sext <2 x i32> %tmp1 to <2 x i64> @@ -2447,88 +2874,145 @@ define <2 x i64> @sshll2_2d(ptr %A) nounwind { } define <8 x i8> @sqshli8b(ptr %A) nounwind { -; CHECK-LABEL: sqshli8b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: sqshl.8b v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqshli8b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: sqshl v0.8b, v0.8b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqshli8b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.8b, #1 +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: sqshl v0.8b, v1.8b, v0.8b +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) ret <8 x i8> %tmp3 } define <4 x i16> @sqshli4h(ptr %A) nounwind { -; CHECK-LABEL: sqshli4h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: sqshl.4h v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqshli4h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: sqshl v0.4h, v0.4h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqshli4h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.4h, #1 +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: sqshl v0.4h, v1.4h, v0.4h +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) ret <4 x i16> %tmp3 } define <2 x i32> @sqshli2s(ptr %A) nounwind { -; CHECK-LABEL: sqshli2s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: sqshl.2s v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqshli2s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: sqshl v0.2s, v0.2s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqshli2s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2s, #1 +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: sqshl v0.2s, v1.2s, v0.2s +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) ret <2 x i32> %tmp3 } define <16 x i8> @sqshli16b(ptr %A) nounwind { -; CHECK-LABEL: sqshli16b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshl.16b v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqshli16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: sqshl v0.16b, v0.16b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqshli16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.16b, #1 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: sqshl v0.16b, v1.16b, v0.16b +; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) ret <16 x i8> %tmp3 } define <8 x i16> @sqshli8h(ptr %A) nounwind { -; CHECK-LABEL: sqshli8h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshl.8h v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqshli8h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: sqshl v0.8h, v0.8h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqshli8h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.8h, #1 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: sqshl v0.8h, v1.8h, v0.8h +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) ret <8 x i16> %tmp3 } define <4 x i32> @sqshli4s(ptr %A) nounwind { -; CHECK-LABEL: sqshli4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshl.4s v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqshli4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: sqshl v0.4s, v0.4s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqshli4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.4s, #1 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: sqshl v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) ret <4 x i32> %tmp3 } define <2 x i64> @sqshli2d(ptr %A) nounwind { -; CHECK-LABEL: sqshli2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshl.2d v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqshli2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: sqshl v0.2d, v0.2d, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqshli2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI190_0 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI190_0] +; CHECK-GI-NEXT: sqshl v0.2d, v1.2d, v0.2d +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) ret <2 x i64> %tmp3 } define <8 x i8> @uqshli8b(ptr %A) nounwind { -; CHECK-LABEL: uqshli8b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: uqshl.8b v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: uqshli8b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: uqshl v0.8b, v0.8b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: uqshli8b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.8b, #1 +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: uqshl v0.8b, v1.8b, v0.8b +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) ret <8 x i8> %tmp3 @@ -2537,9 +3021,9 @@ define <8 x i8> @uqshli8b(ptr %A) nounwind { define <8 x i8> @uqshli8b_1(ptr %A) nounwind { ; CHECK-LABEL: uqshli8b_1: ; CHECK: // %bb.0: -; CHECK-NEXT: movi.8b v1, #8 -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: uqshl.8b v0, v0, v1 +; CHECK-NEXT: movi v0.8b, #8 +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: uqshl v0.8b, v1.8b, v0.8b ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) @@ -2547,78 +3031,130 @@ define <8 x i8> @uqshli8b_1(ptr %A) nounwind { } define <4 x i16> @uqshli4h(ptr %A) nounwind { -; CHECK-LABEL: uqshli4h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: uqshl.4h v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: uqshli4h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: uqshl v0.4h, v0.4h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: uqshli4h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.4h, #1 +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: uqshl v0.4h, v1.4h, v0.4h +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) ret <4 x i16> %tmp3 } define <2 x i32> @uqshli2s(ptr %A) nounwind { -; CHECK-LABEL: uqshli2s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: uqshl.2s v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: uqshli2s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: uqshl v0.2s, v0.2s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: uqshli2s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2s, #1 +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: uqshl v0.2s, v1.2s, v0.2s +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) ret <2 x i32> %tmp3 } define <16 x i8> @uqshli16b(ptr %A) nounwind { -; CHECK-LABEL: uqshli16b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: uqshl.16b v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: uqshli16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: uqshl v0.16b, v0.16b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: uqshli16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.16b, #1 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: uqshl v0.16b, v1.16b, v0.16b +; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) ret <16 x i8> %tmp3 } define <8 x i16> @uqshli8h(ptr %A) nounwind { -; CHECK-LABEL: uqshli8h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: uqshl.8h v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: uqshli8h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: uqshl v0.8h, v0.8h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: uqshli8h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.8h, #1 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: uqshl v0.8h, v1.8h, v0.8h +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) ret <8 x i16> %tmp3 } define <4 x i32> @uqshli4s(ptr %A) nounwind { -; CHECK-LABEL: uqshli4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: uqshl.4s v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: uqshli4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: uqshl v0.4s, v0.4s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: uqshli4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.4s, #1 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: uqshl v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) ret <4 x i32> %tmp3 } define <2 x i64> @uqshli2d(ptr %A) nounwind { -; CHECK-LABEL: uqshli2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: uqshl.2d v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: uqshli2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: uqshl v0.2d, v0.2d, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: uqshli2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI198_0 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI198_0] +; CHECK-GI-NEXT: uqshl v0.2d, v1.2d, v0.2d +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) ret <2 x i64> %tmp3 } define <8 x i8> @ursra8b(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: ursra8b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ursra.8b v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ursra8b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d1, [x0] +; CHECK-SD-NEXT: ldr d0, [x1] +; CHECK-SD-NEXT: ursra v0.8b, v1.8b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ursra8b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: urshl v0.8b, v1.8b, v0.8b +; CHECK-GI-NEXT: ldr d1, [x1] +; CHECK-GI-NEXT: add v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) %tmp4 = load <8 x i8>, ptr %B @@ -2627,12 +3163,21 @@ define <8 x i8> @ursra8b(ptr %A, ptr %B) nounwind { } define <4 x i16> @ursra4h(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: ursra4h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ursra.4h v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ursra4h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d1, [x0] +; CHECK-SD-NEXT: ldr d0, [x1] +; CHECK-SD-NEXT: ursra v0.4h, v1.4h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ursra4h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: urshl v0.4h, v1.4h, v0.4h +; CHECK-GI-NEXT: ldr d1, [x1] +; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) %tmp4 = load <4 x i16>, ptr %B @@ -2641,12 +3186,21 @@ define <4 x i16> @ursra4h(ptr %A, ptr %B) nounwind { } define <2 x i32> @ursra2s(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: ursra2s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ursra.2s v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ursra2s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d1, [x0] +; CHECK-SD-NEXT: ldr d0, [x1] +; CHECK-SD-NEXT: ursra v0.2s, v1.2s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ursra2s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: urshl v0.2s, v1.2s, v0.2s +; CHECK-GI-NEXT: ldr d1, [x1] +; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) %tmp4 = load <2 x i32>, ptr %B @@ -2655,12 +3209,21 @@ define <2 x i32> @ursra2s(ptr %A, ptr %B) nounwind { } define <16 x i8> @ursra16b(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: ursra16b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: ursra.16b v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ursra16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q1, [x0] +; CHECK-SD-NEXT: ldr q0, [x1] +; CHECK-SD-NEXT: ursra v0.16b, v1.16b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ursra16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: urshl v0.16b, v1.16b, v0.16b +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: add v0.16b, v0.16b, v1.16b +; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) %tmp4 = load <16 x i8>, ptr %B @@ -2669,12 +3232,21 @@ define <16 x i8> @ursra16b(ptr %A, ptr %B) nounwind { } define <8 x i16> @ursra8h(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: ursra8h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: ursra.8h v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ursra8h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q1, [x0] +; CHECK-SD-NEXT: ldr q0, [x1] +; CHECK-SD-NEXT: ursra v0.8h, v1.8h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ursra8h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: urshl v0.8h, v1.8h, v0.8h +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) %tmp4 = load <8 x i16>, ptr %B @@ -2683,12 +3255,21 @@ define <8 x i16> @ursra8h(ptr %A, ptr %B) nounwind { } define <4 x i32> @ursra4s(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: ursra4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: ursra.4s v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ursra4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q1, [x0] +; CHECK-SD-NEXT: ldr q0, [x1] +; CHECK-SD-NEXT: ursra v0.4s, v1.4s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ursra4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: urshl v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) %tmp4 = load <4 x i32>, ptr %B @@ -2697,12 +3278,21 @@ define <4 x i32> @ursra4s(ptr %A, ptr %B) nounwind { } define <2 x i64> @ursra2d(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: ursra2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: ursra.2d v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ursra2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q1, [x0] +; CHECK-SD-NEXT: ldr q0, [x1] +; CHECK-SD-NEXT: ursra v0.2d, v1.2d, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ursra2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: urshl v0.2d, v1.2d, v0.2d +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) %tmp4 = load <2 x i64>, ptr %B @@ -2740,12 +3330,21 @@ define i64 @ursra_scalar(ptr %A, ptr %B) nounwind { } define <8 x i8> @srsra8b(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: srsra8b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: srsra.8b v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srsra8b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d1, [x0] +; CHECK-SD-NEXT: ldr d0, [x1] +; CHECK-SD-NEXT: srsra v0.8b, v1.8b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srsra8b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: srshl v0.8b, v1.8b, v0.8b +; CHECK-GI-NEXT: ldr d1, [x1] +; CHECK-GI-NEXT: add v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) %tmp4 = load <8 x i8>, ptr %B @@ -2754,12 +3353,21 @@ define <8 x i8> @srsra8b(ptr %A, ptr %B) nounwind { } define <4 x i16> @srsra4h(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: srsra4h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: srsra.4h v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srsra4h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d1, [x0] +; CHECK-SD-NEXT: ldr d0, [x1] +; CHECK-SD-NEXT: srsra v0.4h, v1.4h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srsra4h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: srshl v0.4h, v1.4h, v0.4h +; CHECK-GI-NEXT: ldr d1, [x1] +; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) %tmp4 = load <4 x i16>, ptr %B @@ -2768,12 +3376,21 @@ define <4 x i16> @srsra4h(ptr %A, ptr %B) nounwind { } define <2 x i32> @srsra2s(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: srsra2s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: srsra.2s v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srsra2s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d1, [x0] +; CHECK-SD-NEXT: ldr d0, [x1] +; CHECK-SD-NEXT: srsra v0.2s, v1.2s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srsra2s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: srshl v0.2s, v1.2s, v0.2s +; CHECK-GI-NEXT: ldr d1, [x1] +; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) %tmp4 = load <2 x i32>, ptr %B @@ -2782,12 +3399,21 @@ define <2 x i32> @srsra2s(ptr %A, ptr %B) nounwind { } define <16 x i8> @srsra16b(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: srsra16b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: srsra.16b v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srsra16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q1, [x0] +; CHECK-SD-NEXT: ldr q0, [x1] +; CHECK-SD-NEXT: srsra v0.16b, v1.16b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srsra16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: srshl v0.16b, v1.16b, v0.16b +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: add v0.16b, v0.16b, v1.16b +; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) %tmp4 = load <16 x i8>, ptr %B @@ -2796,12 +3422,21 @@ define <16 x i8> @srsra16b(ptr %A, ptr %B) nounwind { } define <8 x i16> @srsra8h(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: srsra8h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: srsra.8h v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srsra8h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q1, [x0] +; CHECK-SD-NEXT: ldr q0, [x1] +; CHECK-SD-NEXT: srsra v0.8h, v1.8h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srsra8h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: srshl v0.8h, v1.8h, v0.8h +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) %tmp4 = load <8 x i16>, ptr %B @@ -2810,12 +3445,21 @@ define <8 x i16> @srsra8h(ptr %A, ptr %B) nounwind { } define <4 x i32> @srsra4s(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: srsra4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: srsra.4s v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srsra4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q1, [x0] +; CHECK-SD-NEXT: ldr q0, [x1] +; CHECK-SD-NEXT: srsra v0.4s, v1.4s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srsra4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: srshl v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) %tmp4 = load <4 x i32>, ptr %B @@ -2824,12 +3468,21 @@ define <4 x i32> @srsra4s(ptr %A, ptr %B) nounwind { } define <2 x i64> @srsra2d(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: srsra2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: srsra.2d v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srsra2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q1, [x0] +; CHECK-SD-NEXT: ldr q0, [x1] +; CHECK-SD-NEXT: srsra v0.2d, v1.2d, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srsra2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: srshl v0.2d, v1.2d, v0.2d +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) %tmp4 = load <2 x i64>, ptr %B @@ -2871,7 +3524,7 @@ define <8 x i8> @usra8b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: usra.8b v0, v1, #1 +; CHECK-NEXT: usra v0.8b, v1.8b, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = lshr <8 x i8> %tmp1, @@ -2885,7 +3538,7 @@ define <4 x i16> @usra4h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: usra.4h v0, v1, #1 +; CHECK-NEXT: usra v0.4h, v1.4h, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp3 = lshr <4 x i16> %tmp1, @@ -2899,7 +3552,7 @@ define <2 x i32> @usra2s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: usra.2s v0, v1, #1 +; CHECK-NEXT: usra v0.2s, v1.2s, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp3 = lshr <2 x i32> %tmp1, @@ -2913,7 +3566,7 @@ define <16 x i8> @usra16b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: usra.16b v0, v1, #1 +; CHECK-NEXT: usra v0.16b, v1.16b, #1 ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp3 = lshr <16 x i8> %tmp1, @@ -2927,7 +3580,7 @@ define <8 x i16> @usra8h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: usra.8h v0, v1, #1 +; CHECK-NEXT: usra v0.8h, v1.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = lshr <8 x i16> %tmp1, @@ -2941,7 +3594,7 @@ define <4 x i32> @usra4s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: usra.4s v0, v1, #1 +; CHECK-NEXT: usra v0.4s, v1.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = lshr <4 x i32> %tmp1, @@ -2955,7 +3608,7 @@ define <2 x i64> @usra2d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: usra.2d v0, v1, #1 +; CHECK-NEXT: usra v0.2d, v1.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = lshr <2 x i64> %tmp1, @@ -2965,12 +3618,20 @@ define <2 x i64> @usra2d(ptr %A, ptr %B) nounwind { } define <1 x i64> @usra1d(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: usra1d: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: usra d0, d1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: usra1d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d1, [x0] +; CHECK-SD-NEXT: ldr d0, [x1] +; CHECK-SD-NEXT: usra d0, d1, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: usra1d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr x8, [x0] +; CHECK-GI-NEXT: ldr x9, [x1] +; CHECK-GI-NEXT: add x8, x9, x8, lsr #1 +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %tmp1 = load <1 x i64>, ptr %A %tmp3 = lshr <1 x i64> %tmp1, %tmp4 = load <1 x i64>, ptr %B @@ -2983,7 +3644,7 @@ define <8 x i8> @ssra8b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ssra.8b v0, v1, #1 +; CHECK-NEXT: ssra v0.8b, v1.8b, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = ashr <8 x i8> %tmp1, @@ -2997,7 +3658,7 @@ define <4 x i16> @ssra4h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ssra.4h v0, v1, #1 +; CHECK-NEXT: ssra v0.4h, v1.4h, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp3 = ashr <4 x i16> %tmp1, @@ -3011,7 +3672,7 @@ define <2 x i32> @ssra2s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ssra.2s v0, v1, #1 +; CHECK-NEXT: ssra v0.2s, v1.2s, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp3 = ashr <2 x i32> %tmp1, @@ -3025,7 +3686,7 @@ define <16 x i8> @ssra16b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: ssra.16b v0, v1, #1 +; CHECK-NEXT: ssra v0.16b, v1.16b, #1 ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp3 = ashr <16 x i8> %tmp1, @@ -3039,7 +3700,7 @@ define <8 x i16> @ssra8h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: ssra.8h v0, v1, #1 +; CHECK-NEXT: ssra v0.8h, v1.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = ashr <8 x i16> %tmp1, @@ -3053,7 +3714,7 @@ define <4 x i32> @ssra4s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: ssra.4s v0, v1, #1 +; CHECK-NEXT: ssra v0.4s, v1.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = ashr <4 x i32> %tmp1, @@ -3067,7 +3728,7 @@ define <2 x i64> @ssra2d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: ssra.2d v0, v1, #1 +; CHECK-NEXT: ssra v0.2d, v1.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = ashr <2 x i64> %tmp1, @@ -3081,8 +3742,8 @@ define <8 x i8> @shr_orr8b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ushr.8b v0, v0, #1 -; CHECK-NEXT: orr.8b v0, v0, v1 +; CHECK-NEXT: ushr v0.8b, v0.8b, #1 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp4 = load <8 x i8>, ptr %B @@ -3096,8 +3757,8 @@ define <4 x i16> @shr_orr4h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ushr.4h v0, v0, #1 -; CHECK-NEXT: orr.8b v0, v0, v1 +; CHECK-NEXT: ushr v0.4h, v0.4h, #1 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp4 = load <4 x i16>, ptr %B @@ -3111,8 +3772,8 @@ define <2 x i32> @shr_orr2s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ushr.2s v0, v0, #1 -; CHECK-NEXT: orr.8b v0, v0, v1 +; CHECK-NEXT: ushr v0.2s, v0.2s, #1 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp4 = load <2 x i32>, ptr %B @@ -3126,8 +3787,8 @@ define <16 x i8> @shr_orr16b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: ushr.16b v0, v0, #1 -; CHECK-NEXT: orr.16b v0, v0, v1 +; CHECK-NEXT: ushr v0.16b, v0.16b, #1 +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp4 = load <16 x i8>, ptr %B @@ -3141,8 +3802,8 @@ define <8 x i16> @shr_orr8h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: ushr.8h v0, v0, #1 -; CHECK-NEXT: orr.16b v0, v0, v1 +; CHECK-NEXT: ushr v0.8h, v0.8h, #1 +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp4 = load <8 x i16>, ptr %B @@ -3156,8 +3817,8 @@ define <4 x i32> @shr_orr4s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: ushr.4s v0, v0, #1 -; CHECK-NEXT: orr.16b v0, v0, v1 +; CHECK-NEXT: ushr v0.4s, v0.4s, #1 +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp4 = load <4 x i32>, ptr %B @@ -3171,8 +3832,8 @@ define <2 x i64> @shr_orr2d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: ushr.2d v0, v0, #1 -; CHECK-NEXT: orr.16b v0, v0, v1 +; CHECK-NEXT: ushr v0.2d, v0.2d, #1 +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp4 = load <2 x i64>, ptr %B @@ -3182,13 +3843,21 @@ define <2 x i64> @shr_orr2d(ptr %A, ptr %B) nounwind { } define <8 x i8> @shl_orr8b(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: shl_orr8b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: add.8b v0, v0, v0 -; CHECK-NEXT: orr.8b v0, v0, v1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shl_orr8b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: ldr d1, [x1] +; CHECK-SD-NEXT: add v0.8b, v0.8b, v0.8b +; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shl_orr8b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr d0, [x0] +; CHECK-GI-NEXT: ldr d1, [x1] +; CHECK-GI-NEXT: shl v0.8b, v0.8b, #1 +; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp4 = load <8 x i8>, ptr %B %tmp3 = shl <8 x i8> %tmp1, @@ -3197,13 +3866,21 @@ define <8 x i8> @shl_orr8b(ptr %A, ptr %B) nounwind { } define <4 x i16> @shl_orr4h(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: shl_orr4h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: add.4h v0, v0, v0 -; CHECK-NEXT: orr.8b v0, v0, v1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shl_orr4h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: ldr d1, [x1] +; CHECK-SD-NEXT: add v0.4h, v0.4h, v0.4h +; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shl_orr4h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr d0, [x0] +; CHECK-GI-NEXT: ldr d1, [x1] +; CHECK-GI-NEXT: shl v0.4h, v0.4h, #1 +; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp4 = load <4 x i16>, ptr %B %tmp3 = shl <4 x i16> %tmp1, @@ -3212,13 +3889,21 @@ define <4 x i16> @shl_orr4h(ptr %A, ptr %B) nounwind { } define <2 x i32> @shl_orr2s(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: shl_orr2s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: add.2s v0, v0, v0 -; CHECK-NEXT: orr.8b v0, v0, v1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shl_orr2s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: ldr d1, [x1] +; CHECK-SD-NEXT: add v0.2s, v0.2s, v0.2s +; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shl_orr2s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr d0, [x0] +; CHECK-GI-NEXT: ldr d1, [x1] +; CHECK-GI-NEXT: shl v0.2s, v0.2s, #1 +; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp4 = load <2 x i32>, ptr %B %tmp3 = shl <2 x i32> %tmp1, @@ -3227,13 +3912,21 @@ define <2 x i32> @shl_orr2s(ptr %A, ptr %B) nounwind { } define <16 x i8> @shl_orr16b(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: shl_orr16b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: add.16b v0, v0, v0 -; CHECK-NEXT: orr.16b v0, v0, v1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shl_orr16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: ldr q1, [x1] +; CHECK-SD-NEXT: add v0.16b, v0.16b, v0.16b +; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shl_orr16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: shl v0.16b, v0.16b, #1 +; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp4 = load <16 x i8>, ptr %B %tmp3 = shl <16 x i8> %tmp1, @@ -3242,13 +3935,21 @@ define <16 x i8> @shl_orr16b(ptr %A, ptr %B) nounwind { } define <8 x i16> @shl_orr8h(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: shl_orr8h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: add.8h v0, v0, v0 -; CHECK-NEXT: orr.16b v0, v0, v1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shl_orr8h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: ldr q1, [x1] +; CHECK-SD-NEXT: add v0.8h, v0.8h, v0.8h +; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shl_orr8h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: shl v0.8h, v0.8h, #1 +; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp4 = load <8 x i16>, ptr %B %tmp3 = shl <8 x i16> %tmp1, @@ -3257,13 +3958,21 @@ define <8 x i16> @shl_orr8h(ptr %A, ptr %B) nounwind { } define <4 x i32> @shl_orr4s(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: shl_orr4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: add.4s v0, v0, v0 -; CHECK-NEXT: orr.16b v0, v0, v1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shl_orr4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: ldr q1, [x1] +; CHECK-SD-NEXT: add v0.4s, v0.4s, v0.4s +; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shl_orr4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: shl v0.4s, v0.4s, #1 +; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp4 = load <4 x i32>, ptr %B %tmp3 = shl <4 x i32> %tmp1, @@ -3272,13 +3981,21 @@ define <4 x i32> @shl_orr4s(ptr %A, ptr %B) nounwind { } define <2 x i64> @shl_orr2d(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: shl_orr2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: add.2d v0, v0, v0 -; CHECK-NEXT: orr.16b v0, v0, v1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shl_orr2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: ldr q1, [x1] +; CHECK-SD-NEXT: add v0.2d, v0.2d, v0.2d +; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shl_orr2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: shl v0.2d, v0.2d, #1 +; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp4 = load <2 x i64>, ptr %B %tmp3 = shl <2 x i64> %tmp1, @@ -3287,20 +4004,32 @@ define <2 x i64> @shl_orr2d(ptr %A, ptr %B) nounwind { } define <8 x i16> @shll(<8 x i8> %in) { -; CHECK-LABEL: shll: -; CHECK: // %bb.0: -; CHECK-NEXT: shll.8h v0, v0, #8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shll: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: shll v0.8h, v0.8b, #8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shll: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: shl v0.8h, v0.8h, #8 +; CHECK-GI-NEXT: ret %ext = zext <8 x i8> %in to <8 x i16> %res = shl <8 x i16> %ext, ret <8 x i16> %res } define <4 x i32> @shll_high(<8 x i16> %in) { -; CHECK-LABEL: shll_high: -; CHECK: // %bb.0: -; CHECK-NEXT: shll2.4s v0, v0, #16 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shll_high: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: shll2 v0.4s, v0.8h, #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shll_high: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0 +; CHECK-GI-NEXT: shl v0.4s, v0.4s, #16 +; CHECK-GI-NEXT: ret %extract = shufflevector <8 x i16> %in, <8 x i16> undef, <4 x i32> %ext = zext <4 x i16> %extract to <4 x i32> %res = shl <4 x i32> %ext, @@ -3312,7 +4041,7 @@ define <8 x i8> @sli8b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sli.8b v0, v1, #1 +; CHECK-NEXT: sli v0.8b, v1.8b, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B @@ -3325,7 +4054,7 @@ define <4 x i16> @sli4h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sli.4h v0, v1, #1 +; CHECK-NEXT: sli v0.4h, v1.4h, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -3338,7 +4067,7 @@ define <2 x i32> @sli2s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sli.2s v0, v1, #1 +; CHECK-NEXT: sli v0.2s, v1.2s, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -3364,7 +4093,7 @@ define <16 x i8> @sli16b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sli.16b v0, v1, #1 +; CHECK-NEXT: sli v0.16b, v1.16b, #1 ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = load <16 x i8>, ptr %B @@ -3377,7 +4106,7 @@ define <8 x i16> @sli8h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sli.8h v0, v1, #1 +; CHECK-NEXT: sli v0.8h, v1.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B @@ -3390,7 +4119,7 @@ define <4 x i32> @sli4s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sli.4s v0, v1, #1 +; CHECK-NEXT: sli v0.4s, v1.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B @@ -3403,7 +4132,7 @@ define <2 x i64> @sli2d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sli.2d v0, v1, #1 +; CHECK-NEXT: sli v0.2d, v1.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B @@ -3422,21 +4151,37 @@ declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) nounw declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) nounwind readnone define <1 x i64> @ashr_v1i64(<1 x i64> %a, <1 x i64> %b) { -; CHECK-LABEL: ashr_v1i64: -; CHECK: // %bb.0: -; CHECK-NEXT: neg d1, d1 -; CHECK-NEXT: sshl d0, d0, d1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ashr_v1i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: neg d1, d1 +; CHECK-SD-NEXT: sshl d0, d0, d1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ashr_v1i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: fmov x9, d1 +; CHECK-GI-NEXT: asr x8, x8, x9 +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %c = ashr <1 x i64> %a, %b ret <1 x i64> %c } define void @sqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { -; CHECK-LABEL: sqshl_zero_shift_amount: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: addp.2d v0, v0, v1 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqshl_zero_shift_amount: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqshl_zero_shift_amount: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v2.2d, #0000000000000000 +; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: sqshl v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret entry: %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) @@ -3445,11 +4190,19 @@ entry: } define void @uqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { -; CHECK-LABEL: uqshl_zero_shift_amount: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: addp.2d v0, v0, v1 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: uqshl_zero_shift_amount: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: uqshl_zero_shift_amount: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v2.2d, #0000000000000000 +; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: uqshl v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret entry: %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) @@ -3458,11 +4211,19 @@ entry: } define void @srshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { -; CHECK-LABEL: srshl_zero_shift_amount: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: addp.2d v0, v0, v1 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srshl_zero_shift_amount: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srshl_zero_shift_amount: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v2.2d, #0000000000000000 +; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: srshl v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret entry: %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) @@ -3471,11 +4232,19 @@ entry: } define void @urshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { -; CHECK-LABEL: urshl_zero_shift_amount: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: addp.2d v0, v0, v1 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: urshl_zero_shift_amount: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: urshl_zero_shift_amount: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v2.2d, #0000000000000000 +; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: urshl v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret entry: %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) @@ -3486,8 +4255,8 @@ entry: define void @sqshlu_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { ; CHECK-LABEL: sqshlu_zero_shift_amount: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: addp.2d v0, v0, v1 -; CHECK-NEXT: sqshlu.2d v0, v0, #0 +; CHECK-NEXT: addp v0.2d, v0.2d, v1.2d +; CHECK-NEXT: sqshlu v0.2d, v0.2d, #0 ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret entry: @@ -3498,11 +4267,19 @@ entry: } define void @sshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { -; CHECK-LABEL: sshl_zero_shift_amount: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: addp.2d v0, v0, v1 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sshl_zero_shift_amount: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sshl_zero_shift_amount: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v2.2d, #0000000000000000 +; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: sshl v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret entry: %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) @@ -3511,11 +4288,19 @@ entry: } define void @ushl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { -; CHECK-LABEL: ushl_zero_shift_amount: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: addp.2d v0, v0, v1 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ushl_zero_shift_amount: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ushl_zero_shift_amount: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v2.2d, #0000000000000000 +; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: ushl v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret entry: %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) @@ -3526,8 +4311,8 @@ entry: define <4 x i32> @sext_rshrn(<4 x i32> noundef %a) { ; CHECK-LABEL: sext_rshrn: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: rshrn.4h v0, v0, #13 -; CHECK-NEXT: sshll.4s v0, v0, #0 +; CHECK-NEXT: rshrn v0.4h, v0.4s, #13 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-NEXT: ret entry: %vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %a, i32 13) @@ -3538,8 +4323,8 @@ entry: define <4 x i32> @zext_rshrn(<4 x i32> noundef %a) { ; CHECK-LABEL: zext_rshrn: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: rshrn.4h v0, v0, #13 -; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: rshrn v0.4h, v0.4s, #13 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: ret entry: %vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %a, i32 13) @@ -3550,9 +4335,9 @@ entry: define <4 x i16> @mul_rshrn(<4 x i32> noundef %a) { ; CHECK-LABEL: mul_rshrn: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi.4s v1, #3 -; CHECK-NEXT: add.4s v0, v0, v1 -; CHECK-NEXT: rshrn.4h v0, v0, #13 +; CHECK-NEXT: movi v1.4s, #3 +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: rshrn v0.4h, v0.4s, #13 ; CHECK-NEXT: ret entry: %b = add <4 x i32> %a, @@ -3561,15 +4346,61 @@ entry: } define <8 x i16> @signbits_vashr(<8 x i16> %a) { -; CHECK-LABEL: signbits_vashr: -; CHECK: // %bb.0: -; CHECK-NEXT: sshr.8h v0, v0, #8 -; CHECK-NEXT: sshr.8h v0, v0, #9 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: signbits_vashr: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sshr v0.8h, v0.8h, #8 +; CHECK-SD-NEXT: sshr v0.8h, v0.8h, #9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: signbits_vashr: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mvni v1.8h, #7 +; CHECK-GI-NEXT: mvni v2.8h, #8 +; CHECK-GI-NEXT: sshl v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: sshl v0.8h, v0.8h, v2.8h +; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #7 +; CHECK-GI-NEXT: ret %b = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %a, <8 x i16> ) %c = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %b, <8 x i16> ) %d = ashr <8 x i16> %c, ret <8 x i16> %d } +define <2 x i8> @lshr_trunc_v2i64_v2i8(<2 x i64> %a) { +; CHECK-LABEL: lshr_trunc_v2i64_v2i8: +; CHECK: // %bb.0: +; CHECK-NEXT: shrn v0.2s, v0.2d, #16 +; CHECK-NEXT: ret + %b = lshr <2 x i64> %a, + %c = trunc <2 x i64> %b to <2 x i8> + ret <2 x i8> %c +} + +define <2 x i8> @ashr_trunc_v2i64_v2i8(<2 x i64> %a) { +; CHECK-LABEL: ashr_trunc_v2i64_v2i8: +; CHECK: // %bb.0: +; CHECK-NEXT: shrn v0.2s, v0.2d, #16 +; CHECK-NEXT: ret + %b = ashr <2 x i64> %a, + %c = trunc <2 x i64> %b to <2 x i8> + ret <2 x i8> %c +} + +define <2 x i8> @shl_trunc_v2i64_v2i8(<2 x i64> %a) { +; CHECK-SD-LABEL: shl_trunc_v2i64_v2i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shl_trunc_v2i64_v2i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: shl v0.2d, v0.2d, #16 +; CHECK-GI-NEXT: xtn v0.2s, v0.2d +; CHECK-GI-NEXT: ret + %b = shl <2 x i64> %a, + %c = trunc <2 x i64> %b to <2 x i8> + ret <2 x i8> %c +} + declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>) diff --git a/llvm/test/CodeGen/Generic/cgdata-merge-crash.ll b/llvm/test/CodeGen/AArch64/cgdata-merge-crash.ll similarity index 100% rename from llvm/test/CodeGen/Generic/cgdata-merge-crash.ll rename to llvm/test/CodeGen/AArch64/cgdata-merge-crash.ll diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll b/llvm/test/CodeGen/AArch64/cgdata-merge-local.ll similarity index 87% rename from llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll rename to llvm/test/CodeGen/AArch64/cgdata-merge-local.ll index 660ffe6109948..608fe29e17398 100644 --- a/llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll +++ b/llvm/test/CodeGen/AArch64/cgdata-merge-local.ll @@ -2,9 +2,9 @@ ; while parameterizing a difference in their global variables, g1 and g2. ; To achieve this, we create two instances of the global merging function, f1.Tgm and f2.Tgm, ; which are tail-called from thunks f1 and f2 respectively. -; These identical functions, f1.Tgm and f2.Tgm, will be folded by the linker via Identical Code Folding (IFC). +; These identical functions, f1.Tgm and f2.Tgm, will be folded by the linker via Identical Code Folding (ICF). -; RUN: opt -S --passes=global-merge-func %s | FileCheck %s +; RUN: opt -mtriple=arm64-apple-darwin -S --passes=global-merge-func %s | FileCheck %s ; A merging instance is created with additional parameter. ; CHECK: define internal i32 @f1.Tgm(i32 %0, ptr %1) @@ -38,8 +38,8 @@ ; CHECK-NEXT: %1 = tail call i32 @f2.Tgm(i32 %a, ptr @g2) ; CHECK-NEXT: ret i32 %1 -; RUN: llc -enable-global-merge-func=true < %s | FileCheck %s --check-prefix=MERGE -; RUN: llc -enable-global-merge-func=false < %s | FileCheck %s --check-prefix=NOMERGE +; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true < %s | FileCheck %s --check-prefix=MERGE +; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=false < %s | FileCheck %s --check-prefix=NOMERGE ; MERGE: _f1.Tgm ; MERGE: _f2.Tgm @@ -47,9 +47,6 @@ ; NOMERGE-NOT: _f1.Tgm ; NOMERGE-NOT: _f2.Tgm -target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" -target triple = "arm64-unknown-ios12.0.0" - @g = external local_unnamed_addr global [0 x i32], align 4 @g1 = external global i32, align 4 @g2 = external global i32, align 4 diff --git a/llvm/test/CodeGen/AArch64/cgdata-merge-no-params.ll b/llvm/test/CodeGen/AArch64/cgdata-merge-no-params.ll new file mode 100644 index 0000000000000..10f0e10f11d66 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cgdata-merge-no-params.ll @@ -0,0 +1,39 @@ +; This test verifies whether two identical functions, f1 and f2, can be merged +; locally using the global merge function. +; The functions, f1.Tgm and f2.Tgm, will be folded by the linker through +; Identical Code Folding (ICF). +; While identical functions can already be folded by the linker, creating this +; canonical form can be beneficial in downstream passes. This merging process +; can be controlled by the -global-merging-skip-no-params option. + +; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s --check-prefix=MERGE +; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=true < %s | FileCheck %s --implicit-check-not=".Tgm" + +; MERGE: _f1.Tgm +; MERGE: _f2.Tgm + +@g = external local_unnamed_addr global [0 x i32], align 4 +@g1 = external global i32, align 4 +@g2 = external global i32, align 4 + +define i32 @f1(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g1, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +} + +define i32 @f2(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g1, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +} diff --git a/llvm/test/CodeGen/AArch64/concat-vector.ll b/llvm/test/CodeGen/AArch64/concat-vector.ll index d9aaae20afc69..d4d89a7c9c22e 100644 --- a/llvm/test/CodeGen/AArch64/concat-vector.ll +++ b/llvm/test/CodeGen/AArch64/concat-vector.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc -mtriple=aarch64 -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI define <4 x i8> @concat1(<2 x i8> %A, <2 x i8> %B) { ; CHECK-SD-LABEL: concat1: diff --git a/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll b/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll index 43c6e01911462..75d55773b3681 100644 --- a/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll +++ b/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI define <2 x i32> @and_extract_zext_idx0(<4 x i16> %vec) nounwind { ; CHECK-SD-LABEL: and_extract_zext_idx0: diff --git a/llvm/test/CodeGen/AArch64/extract-vector-elt-sve.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt-sve.ll index d18af3d5ae945..7705d8949ca1e 100644 --- a/llvm/test/CodeGen/AArch64/extract-vector-elt-sve.ll +++ b/llvm/test/CodeGen/AArch64/extract-vector-elt-sve.ll @@ -2,6 +2,13 @@ ; RUN: llc -mtriple=aarch64 -mattr=+sve -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc -mtriple=aarch64 -mattr=+sve -aarch64-enable-gisel-sve=1 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; CHECK-GI: warning: Instruction selection used fallback path for insert_vscale_8_i16_zero +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_vscale_8_i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_vscale_16_i8_zero +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_vscale_16_i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for extract_vscale_16_i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for extract_vscale_16_i8_zero + define @insert_vscale_2_i64_zero( %vec, i64 %elt) { ; CHECK-SD-LABEL: insert_vscale_2_i64_zero: ; CHECK-SD: // %bb.0: // %entry diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll index 7056a4d28fed3..51aad4fe25d3b 100644 --- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll +++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll @@ -1,164 +1,308 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NO16 -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-NO16,CHECK-SD-NO16 +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-FP16,CHECK-SD-FP16 +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-NO16,CHECK-GI-NO16 +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-FP16,CHECK-GI-FP16 ; fptoui define i32 @fcvtzs_f32_i32_7(float %flt) { -; CHECK-LABEL: fcvtzs_f32_i32_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs w0, s0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_f32_i32_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs w0, s0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_f32_i32_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzs w0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 128.0 %cvt = fptosi float %fix to i32 ret i32 %cvt } define i32 @fcvtzs_f32_i32_32(float %flt) { -; CHECK-LABEL: fcvtzs_f32_i32_32: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs w0, s0, #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_f32_i32_32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs w0, s0, #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_f32_i32_32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1333788672 // =0x4f800000 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzs w0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 4294967296.0 %cvt = fptosi float %fix to i32 ret i32 %cvt } define i64 @fcvtzs_f32_i64_7(float %flt) { -; CHECK-LABEL: fcvtzs_f32_i64_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs x0, s0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_f32_i64_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs x0, s0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_f32_i64_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzs x0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 128.0 %cvt = fptosi float %fix to i64 ret i64 %cvt } define i64 @fcvtzs_f32_i64_64(float %flt) { -; CHECK-LABEL: fcvtzs_f32_i64_64: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs x0, s0, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_f32_i64_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs x0, s0, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_f32_i64_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1602224128 // =0x5f800000 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzs x0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 18446744073709551616.0 %cvt = fptosi float %fix to i64 ret i64 %cvt } define i32 @fcvtzs_f64_i32_7(double %dbl) { -; CHECK-LABEL: fcvtzs_f64_i32_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs w0, d0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_f64_i32_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs w0, d0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_f64_i32_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4638707616191610880 // =0x4060000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzs w0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 128.0 %cvt = fptosi double %fix to i32 ret i32 %cvt } define i32 @fcvtzs_f64_i32_32(double %dbl) { -; CHECK-LABEL: fcvtzs_f64_i32_32: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs w0, d0, #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_f64_i32_32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs w0, d0, #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_f64_i32_32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4751297606875873280 // =0x41f0000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzs w0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 4294967296.0 %cvt = fptosi double %fix to i32 ret i32 %cvt } define i64 @fcvtzs_f64_i64_7(double %dbl) { -; CHECK-LABEL: fcvtzs_f64_i64_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs x0, d0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_f64_i64_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs x0, d0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_f64_i64_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4638707616191610880 // =0x4060000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzs x0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 128.0 %cvt = fptosi double %fix to i64 ret i64 %cvt } define i64 @fcvtzs_f64_i64_64(double %dbl) { -; CHECK-LABEL: fcvtzs_f64_i64_64: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs x0, d0, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_f64_i64_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs x0, d0, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_f64_i64_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4895412794951729152 // =0x43f0000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzs x0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 18446744073709551616.0 %cvt = fptosi double %fix to i64 ret i64 %cvt } define i32 @fcvtzs_f16_i32_7(half %flt) { -; CHECK-NO16-LABEL: fcvtzs_f16_i32_7: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzs w0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzs_f16_i32_7: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzs w0, h0, #7 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzs_f16_i32_7: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzs w0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzs_f16_i32_7: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzs w0, h0, #7 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_7: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzs w0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzs_f16_i32_7: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI8_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI8_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzs w0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %flt, 128.0 %cvt = fptosi half %fix to i32 ret i32 %cvt } define i32 @fcvtzs_f16_i32_15(half %flt) { -; CHECK-NO16-LABEL: fcvtzs_f16_i32_15: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzs w0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzs_f16_i32_15: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzs w0, h0, #15 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzs_f16_i32_15: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzs w0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzs_f16_i32_15: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzs w0, h0, #15 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_15: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzs w0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzs_f16_i32_15: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI9_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI9_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzs w0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %flt, 32768.0 %cvt = fptosi half %fix to i32 ret i32 %cvt } define i64 @fcvtzs_f16_i64_7(half %flt) { -; CHECK-NO16-LABEL: fcvtzs_f16_i64_7: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzs x0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzs_f16_i64_7: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzs x0, h0, #7 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzs_f16_i64_7: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzs x0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzs_f16_i64_7: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzs x0, h0, #7 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_7: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzs x0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzs_f16_i64_7: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI10_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI10_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzs x0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %flt, 128.0 %cvt = fptosi half %fix to i64 ret i64 %cvt } define i64 @fcvtzs_f16_i64_15(half %flt) { -; CHECK-NO16-LABEL: fcvtzs_f16_i64_15: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzs x0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzs_f16_i64_15: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzs x0, h0, #15 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzs_f16_i64_15: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzs x0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzs_f16_i64_15: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzs x0, h0, #15 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_15: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzs x0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzs_f16_i64_15: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI11_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI11_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzs x0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %flt, 32768.0 %cvt = fptosi half %fix to i64 ret i64 %cvt @@ -167,160 +311,302 @@ define i64 @fcvtzs_f16_i64_15(half %flt) { ; fptoui define i32 @fcvtzu_f32_i32_7(float %flt) { -; CHECK-LABEL: fcvtzu_f32_i32_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu w0, s0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_f32_i32_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu w0, s0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_f32_i32_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzu w0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 128.0 %cvt = fptoui float %fix to i32 ret i32 %cvt } define i32 @fcvtzu_f32_i32_32(float %flt) { -; CHECK-LABEL: fcvtzu_f32_i32_32: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu w0, s0, #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_f32_i32_32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu w0, s0, #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_f32_i32_32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1333788672 // =0x4f800000 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzu w0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 4294967296.0 %cvt = fptoui float %fix to i32 ret i32 %cvt } define i64 @fcvtzu_f32_i64_7(float %flt) { -; CHECK-LABEL: fcvtzu_f32_i64_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu x0, s0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_f32_i64_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu x0, s0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_f32_i64_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzu x0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 128.0 %cvt = fptoui float %fix to i64 ret i64 %cvt } define i64 @fcvtzu_f32_i64_64(float %flt) { -; CHECK-LABEL: fcvtzu_f32_i64_64: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu x0, s0, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_f32_i64_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu x0, s0, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_f32_i64_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1602224128 // =0x5f800000 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzu x0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 18446744073709551616.0 %cvt = fptoui float %fix to i64 ret i64 %cvt } define i32 @fcvtzu_f64_i32_7(double %dbl) { -; CHECK-LABEL: fcvtzu_f64_i32_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu w0, d0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_f64_i32_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu w0, d0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_f64_i32_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4638707616191610880 // =0x4060000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzu w0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 128.0 %cvt = fptoui double %fix to i32 ret i32 %cvt } define i32 @fcvtzu_f64_i32_32(double %dbl) { -; CHECK-LABEL: fcvtzu_f64_i32_32: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu w0, d0, #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_f64_i32_32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu w0, d0, #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_f64_i32_32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4751297606875873280 // =0x41f0000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzu w0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 4294967296.0 %cvt = fptoui double %fix to i32 ret i32 %cvt } define i64 @fcvtzu_f64_i64_7(double %dbl) { -; CHECK-LABEL: fcvtzu_f64_i64_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu x0, d0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_f64_i64_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu x0, d0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_f64_i64_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4638707616191610880 // =0x4060000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzu x0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 128.0 %cvt = fptoui double %fix to i64 ret i64 %cvt } define i64 @fcvtzu_f64_i64_64(double %dbl) { -; CHECK-LABEL: fcvtzu_f64_i64_64: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu x0, d0, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_f64_i64_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu x0, d0, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_f64_i64_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4895412794951729152 // =0x43f0000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzu x0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 18446744073709551616.0 %cvt = fptoui double %fix to i64 ret i64 %cvt } define i32 @fcvtzu_f16_i32_7(half %flt) { -; CHECK-NO16-LABEL: fcvtzu_f16_i32_7: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzu w0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzu_f16_i32_7: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzu w0, h0, #7 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzu_f16_i32_7: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzu w0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzu_f16_i32_7: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzu w0, h0, #7 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_7: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzu w0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzu_f16_i32_7: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI20_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI20_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzu w0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %flt, 128.0 %cvt = fptoui half %fix to i32 ret i32 %cvt } define i32 @fcvtzu_f16_i32_15(half %flt) { -; CHECK-NO16-LABEL: fcvtzu_f16_i32_15: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzu w0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzu_f16_i32_15: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzu w0, h0, #15 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzu_f16_i32_15: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzu w0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzu_f16_i32_15: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzu w0, h0, #15 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_15: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzu w0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzu_f16_i32_15: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI21_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI21_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzu w0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %flt, 32768.0 %cvt = fptoui half %fix to i32 ret i32 %cvt } define i64 @fcvtzu_f16_i64_7(half %flt) { -; CHECK-NO16-LABEL: fcvtzu_f16_i64_7: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzu x0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzu_f16_i64_7: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzu x0, h0, #7 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzu_f16_i64_7: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzu x0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzu_f16_i64_7: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzu x0, h0, #7 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_7: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzu x0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzu_f16_i64_7: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI22_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI22_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzu x0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %flt, 128.0 %cvt = fptoui half %fix to i64 ret i64 %cvt } define i64 @fcvtzu_f16_i64_15(half %flt) { -; CHECK-NO16-LABEL: fcvtzu_f16_i64_15: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzu x0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzu_f16_i64_15: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzu x0, h0, #15 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzu_f16_i64_15: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzu x0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzu_f16_i64_15: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzu x0, h0, #15 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_15: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzu x0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzu_f16_i64_15: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI23_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI23_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzu x0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %flt, 32768.0 %cvt = fptoui half %fix to i64 ret i64 %cvt @@ -329,160 +615,302 @@ define i64 @fcvtzu_f16_i64_15(half %flt) { ; sitofp define float @scvtf_f32_i32_7(i32 %int) { -; CHECK-LABEL: scvtf_f32_i32_7: -; CHECK: // %bb.0: -; CHECK-NEXT: scvtf s0, w0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scvtf_f32_i32_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: scvtf s0, w0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scvtf_f32_i32_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2s, #67, lsl #24 +; CHECK-GI-NEXT: scvtf s1, w0 +; CHECK-GI-NEXT: fdiv s0, s1, s0 +; CHECK-GI-NEXT: ret %cvt = sitofp i32 %int to float %fix = fdiv float %cvt, 128.0 ret float %fix } define float @scvtf_f32_i32_32(i32 %int) { -; CHECK-LABEL: scvtf_f32_i32_32: -; CHECK: // %bb.0: -; CHECK-NEXT: scvtf s0, w0, #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scvtf_f32_i32_32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: scvtf s0, w0, #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scvtf_f32_i32_32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: scvtf s0, w0 +; CHECK-GI-NEXT: mov w8, #1333788672 // =0x4f800000 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fdiv s0, s0, s1 +; CHECK-GI-NEXT: ret %cvt = sitofp i32 %int to float %fix = fdiv float %cvt, 4294967296.0 ret float %fix } define float @scvtf_f32_i64_7(i64 %long) { -; CHECK-LABEL: scvtf_f32_i64_7: -; CHECK: // %bb.0: -; CHECK-NEXT: scvtf s0, x0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scvtf_f32_i64_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: scvtf s0, x0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scvtf_f32_i64_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2s, #67, lsl #24 +; CHECK-GI-NEXT: scvtf s1, x0 +; CHECK-GI-NEXT: fdiv s0, s1, s0 +; CHECK-GI-NEXT: ret %cvt = sitofp i64 %long to float %fix = fdiv float %cvt, 128.0 ret float %fix } define float @scvtf_f32_i64_64(i64 %long) { -; CHECK-LABEL: scvtf_f32_i64_64: -; CHECK: // %bb.0: -; CHECK-NEXT: scvtf s0, x0, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scvtf_f32_i64_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: scvtf s0, x0, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scvtf_f32_i64_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: scvtf s0, x0 +; CHECK-GI-NEXT: mov w8, #1602224128 // =0x5f800000 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fdiv s0, s0, s1 +; CHECK-GI-NEXT: ret %cvt = sitofp i64 %long to float %fix = fdiv float %cvt, 18446744073709551616.0 ret float %fix } define double @scvtf_f64_i32_7(i32 %int) { -; CHECK-LABEL: scvtf_f64_i32_7: -; CHECK: // %bb.0: -; CHECK-NEXT: scvtf d0, w0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scvtf_f64_i32_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: scvtf d0, w0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scvtf_f64_i32_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: scvtf d0, w0 +; CHECK-GI-NEXT: mov x8, #4638707616191610880 // =0x4060000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fdiv d0, d0, d1 +; CHECK-GI-NEXT: ret %cvt = sitofp i32 %int to double %fix = fdiv double %cvt, 128.0 ret double %fix } define double @scvtf_f64_i32_32(i32 %int) { -; CHECK-LABEL: scvtf_f64_i32_32: -; CHECK: // %bb.0: -; CHECK-NEXT: scvtf d0, w0, #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scvtf_f64_i32_32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: scvtf d0, w0, #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scvtf_f64_i32_32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: scvtf d0, w0 +; CHECK-GI-NEXT: mov x8, #4751297606875873280 // =0x41f0000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fdiv d0, d0, d1 +; CHECK-GI-NEXT: ret %cvt = sitofp i32 %int to double %fix = fdiv double %cvt, 4294967296.0 ret double %fix } define double @scvtf_f64_i64_7(i64 %long) { -; CHECK-LABEL: scvtf_f64_i64_7: -; CHECK: // %bb.0: -; CHECK-NEXT: scvtf d0, x0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scvtf_f64_i64_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: scvtf d0, x0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scvtf_f64_i64_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: scvtf d0, x0 +; CHECK-GI-NEXT: mov x8, #4638707616191610880 // =0x4060000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fdiv d0, d0, d1 +; CHECK-GI-NEXT: ret %cvt = sitofp i64 %long to double %fix = fdiv double %cvt, 128.0 ret double %fix } define double @scvtf_f64_i64_64(i64 %long) { -; CHECK-LABEL: scvtf_f64_i64_64: -; CHECK: // %bb.0: -; CHECK-NEXT: scvtf d0, x0, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scvtf_f64_i64_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: scvtf d0, x0, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scvtf_f64_i64_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: scvtf d0, x0 +; CHECK-GI-NEXT: mov x8, #4895412794951729152 // =0x43f0000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fdiv d0, d0, d1 +; CHECK-GI-NEXT: ret %cvt = sitofp i64 %long to double %fix = fdiv double %cvt, 18446744073709551616.0 ret double %fix } define half @scvtf_f16_i32_7(i32 %int) { -; CHECK-NO16-LABEL: scvtf_f16_i32_7: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: scvtf s1, w0 -; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24 -; CHECK-NO16-NEXT: fcvt h1, s1 -; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fmul s0, s1, s0 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: scvtf_f16_i32_7: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: scvtf h0, w0, #7 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: scvtf_f16_i32_7: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: scvtf s1, w0 +; CHECK-SD-NO16-NEXT: movi v0.2s, #60, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt h1, s1 +; CHECK-SD-NO16-NEXT: fcvt s1, h1 +; CHECK-SD-NO16-NEXT: fmul s0, s1, s0 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: scvtf_f16_i32_7: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: scvtf h0, w0, #7 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: scvtf_f16_i32_7: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: scvtf s0, w0 +; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: scvtf_f16_i32_7: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: scvtf h0, w0 +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI32_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI32_0] +; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1 +; CHECK-GI-FP16-NEXT: ret %cvt = sitofp i32 %int to half %fix = fdiv half %cvt, 128.0 ret half %fix } define half @scvtf_f16_i32_15(i32 %int) { -; CHECK-NO16-LABEL: scvtf_f16_i32_15: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: scvtf s1, w0 -; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24 -; CHECK-NO16-NEXT: fcvt h1, s1 -; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fmul s0, s1, s0 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: scvtf_f16_i32_15: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: scvtf h0, w0, #15 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: scvtf_f16_i32_15: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: scvtf s1, w0 +; CHECK-SD-NO16-NEXT: movi v0.2s, #56, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt h1, s1 +; CHECK-SD-NO16-NEXT: fcvt s1, h1 +; CHECK-SD-NO16-NEXT: fmul s0, s1, s0 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: scvtf_f16_i32_15: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: scvtf h0, w0, #15 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: scvtf_f16_i32_15: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: scvtf s0, w0 +; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: scvtf_f16_i32_15: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: scvtf h0, w0 +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI33_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI33_0] +; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1 +; CHECK-GI-FP16-NEXT: ret %cvt = sitofp i32 %int to half %fix = fdiv half %cvt, 32768.0 ret half %fix } define half @scvtf_f16_i64_7(i64 %long) { -; CHECK-NO16-LABEL: scvtf_f16_i64_7: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: scvtf s1, x0 -; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24 -; CHECK-NO16-NEXT: fcvt h1, s1 -; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fmul s0, s1, s0 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: scvtf_f16_i64_7: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: scvtf h0, x0, #7 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: scvtf_f16_i64_7: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: scvtf s1, x0 +; CHECK-SD-NO16-NEXT: movi v0.2s, #60, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt h1, s1 +; CHECK-SD-NO16-NEXT: fcvt s1, h1 +; CHECK-SD-NO16-NEXT: fmul s0, s1, s0 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: scvtf_f16_i64_7: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: scvtf h0, x0, #7 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: scvtf_f16_i64_7: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: scvtf s0, x0 +; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: scvtf_f16_i64_7: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: scvtf h0, x0 +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI34_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI34_0] +; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1 +; CHECK-GI-FP16-NEXT: ret %cvt = sitofp i64 %long to half %fix = fdiv half %cvt, 128.0 ret half %fix } define half @scvtf_f16_i64_15(i64 %long) { -; CHECK-NO16-LABEL: scvtf_f16_i64_15: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: scvtf s1, x0 -; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24 -; CHECK-NO16-NEXT: fcvt h1, s1 -; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fmul s0, s1, s0 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: scvtf_f16_i64_15: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: scvtf h0, x0, #15 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: scvtf_f16_i64_15: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: scvtf s1, x0 +; CHECK-SD-NO16-NEXT: movi v0.2s, #56, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt h1, s1 +; CHECK-SD-NO16-NEXT: fcvt s1, h1 +; CHECK-SD-NO16-NEXT: fmul s0, s1, s0 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: scvtf_f16_i64_15: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: scvtf h0, x0, #15 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: scvtf_f16_i64_15: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: scvtf s0, x0 +; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: scvtf_f16_i64_15: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: scvtf h0, x0 +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI35_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI35_0] +; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1 +; CHECK-GI-FP16-NEXT: ret %cvt = sitofp i64 %long to half %fix = fdiv half %cvt, 32768.0 ret half %fix @@ -491,160 +919,302 @@ define half @scvtf_f16_i64_15(i64 %long) { ; uitofp define float @ucvtf_f32_i32_7(i32 %int) { -; CHECK-LABEL: ucvtf_f32_i32_7: -; CHECK: // %bb.0: -; CHECK-NEXT: ucvtf s0, w0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ucvtf_f32_i32_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ucvtf s0, w0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ucvtf_f32_i32_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2s, #67, lsl #24 +; CHECK-GI-NEXT: ucvtf s1, w0 +; CHECK-GI-NEXT: fdiv s0, s1, s0 +; CHECK-GI-NEXT: ret %cvt = uitofp i32 %int to float %fix = fdiv float %cvt, 128.0 ret float %fix } define float @ucvtf_f32_i32_32(i32 %int) { -; CHECK-LABEL: ucvtf_f32_i32_32: -; CHECK: // %bb.0: -; CHECK-NEXT: ucvtf s0, w0, #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ucvtf_f32_i32_32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ucvtf s0, w0, #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ucvtf_f32_i32_32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ucvtf s0, w0 +; CHECK-GI-NEXT: mov w8, #1333788672 // =0x4f800000 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fdiv s0, s0, s1 +; CHECK-GI-NEXT: ret %cvt = uitofp i32 %int to float %fix = fdiv float %cvt, 4294967296.0 ret float %fix } define float @ucvtf_f32_i64_7(i64 %long) { -; CHECK-LABEL: ucvtf_f32_i64_7: -; CHECK: // %bb.0: -; CHECK-NEXT: ucvtf s0, x0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ucvtf_f32_i64_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ucvtf s0, x0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ucvtf_f32_i64_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2s, #67, lsl #24 +; CHECK-GI-NEXT: ucvtf s1, x0 +; CHECK-GI-NEXT: fdiv s0, s1, s0 +; CHECK-GI-NEXT: ret %cvt = uitofp i64 %long to float %fix = fdiv float %cvt, 128.0 ret float %fix } define float @ucvtf_f32_i64_64(i64 %long) { -; CHECK-LABEL: ucvtf_f32_i64_64: -; CHECK: // %bb.0: -; CHECK-NEXT: ucvtf s0, x0, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ucvtf_f32_i64_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ucvtf s0, x0, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ucvtf_f32_i64_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ucvtf s0, x0 +; CHECK-GI-NEXT: mov w8, #1602224128 // =0x5f800000 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fdiv s0, s0, s1 +; CHECK-GI-NEXT: ret %cvt = uitofp i64 %long to float %fix = fdiv float %cvt, 18446744073709551616.0 ret float %fix } define double @ucvtf_f64_i32_7(i32 %int) { -; CHECK-LABEL: ucvtf_f64_i32_7: -; CHECK: // %bb.0: -; CHECK-NEXT: ucvtf d0, w0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ucvtf_f64_i32_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ucvtf d0, w0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ucvtf_f64_i32_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ucvtf d0, w0 +; CHECK-GI-NEXT: mov x8, #4638707616191610880 // =0x4060000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fdiv d0, d0, d1 +; CHECK-GI-NEXT: ret %cvt = uitofp i32 %int to double %fix = fdiv double %cvt, 128.0 ret double %fix } define double @ucvtf_f64_i32_32(i32 %int) { -; CHECK-LABEL: ucvtf_f64_i32_32: -; CHECK: // %bb.0: -; CHECK-NEXT: ucvtf d0, w0, #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ucvtf_f64_i32_32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ucvtf d0, w0, #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ucvtf_f64_i32_32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ucvtf d0, w0 +; CHECK-GI-NEXT: mov x8, #4751297606875873280 // =0x41f0000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fdiv d0, d0, d1 +; CHECK-GI-NEXT: ret %cvt = uitofp i32 %int to double %fix = fdiv double %cvt, 4294967296.0 ret double %fix } define double @ucvtf_f64_i64_7(i64 %long) { -; CHECK-LABEL: ucvtf_f64_i64_7: -; CHECK: // %bb.0: -; CHECK-NEXT: ucvtf d0, x0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ucvtf_f64_i64_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ucvtf d0, x0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ucvtf_f64_i64_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ucvtf d0, x0 +; CHECK-GI-NEXT: mov x8, #4638707616191610880 // =0x4060000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fdiv d0, d0, d1 +; CHECK-GI-NEXT: ret %cvt = uitofp i64 %long to double %fix = fdiv double %cvt, 128.0 ret double %fix } define double @ucvtf_f64_i64_64(i64 %long) { -; CHECK-LABEL: ucvtf_f64_i64_64: -; CHECK: // %bb.0: -; CHECK-NEXT: ucvtf d0, x0, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ucvtf_f64_i64_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ucvtf d0, x0, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ucvtf_f64_i64_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ucvtf d0, x0 +; CHECK-GI-NEXT: mov x8, #4895412794951729152 // =0x43f0000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fdiv d0, d0, d1 +; CHECK-GI-NEXT: ret %cvt = uitofp i64 %long to double %fix = fdiv double %cvt, 18446744073709551616.0 ret double %fix } define half @ucvtf_f16_i32_7(i32 %int) { -; CHECK-NO16-LABEL: ucvtf_f16_i32_7: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: ucvtf s1, w0 -; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24 -; CHECK-NO16-NEXT: fcvt h1, s1 -; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fmul s0, s1, s0 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: ucvtf_f16_i32_7: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: ucvtf h0, w0, #7 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: ucvtf_f16_i32_7: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: ucvtf s1, w0 +; CHECK-SD-NO16-NEXT: movi v0.2s, #60, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt h1, s1 +; CHECK-SD-NO16-NEXT: fcvt s1, h1 +; CHECK-SD-NO16-NEXT: fmul s0, s1, s0 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: ucvtf_f16_i32_7: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: ucvtf h0, w0, #7 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_7: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: ucvtf s0, w0 +; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: ucvtf_f16_i32_7: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: ucvtf h0, w0 +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI44_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI44_0] +; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1 +; CHECK-GI-FP16-NEXT: ret %cvt = uitofp i32 %int to half %fix = fdiv half %cvt, 128.0 ret half %fix } define half @ucvtf_f16_i32_15(i32 %int) { -; CHECK-NO16-LABEL: ucvtf_f16_i32_15: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: ucvtf s1, w0 -; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24 -; CHECK-NO16-NEXT: fcvt h1, s1 -; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fmul s0, s1, s0 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: ucvtf_f16_i32_15: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: ucvtf h0, w0, #15 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: ucvtf_f16_i32_15: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: ucvtf s1, w0 +; CHECK-SD-NO16-NEXT: movi v0.2s, #56, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt h1, s1 +; CHECK-SD-NO16-NEXT: fcvt s1, h1 +; CHECK-SD-NO16-NEXT: fmul s0, s1, s0 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: ucvtf_f16_i32_15: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: ucvtf h0, w0, #15 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_15: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: ucvtf s0, w0 +; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: ucvtf_f16_i32_15: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: ucvtf h0, w0 +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI45_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI45_0] +; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1 +; CHECK-GI-FP16-NEXT: ret %cvt = uitofp i32 %int to half %fix = fdiv half %cvt, 32768.0 ret half %fix } define half @ucvtf_f16_i64_7(i64 %long) { -; CHECK-NO16-LABEL: ucvtf_f16_i64_7: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: ucvtf s1, x0 -; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24 -; CHECK-NO16-NEXT: fcvt h1, s1 -; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fmul s0, s1, s0 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: ucvtf_f16_i64_7: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: ucvtf h0, x0, #7 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: ucvtf_f16_i64_7: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: ucvtf s1, x0 +; CHECK-SD-NO16-NEXT: movi v0.2s, #60, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt h1, s1 +; CHECK-SD-NO16-NEXT: fcvt s1, h1 +; CHECK-SD-NO16-NEXT: fmul s0, s1, s0 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: ucvtf_f16_i64_7: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: ucvtf h0, x0, #7 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: ucvtf_f16_i64_7: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: ucvtf s0, x0 +; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: ucvtf_f16_i64_7: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: ucvtf h0, x0 +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI46_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI46_0] +; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1 +; CHECK-GI-FP16-NEXT: ret %cvt = uitofp i64 %long to half %fix = fdiv half %cvt, 128.0 ret half %fix } define half @ucvtf_f16_i64_15(i64 %long) { -; CHECK-NO16-LABEL: ucvtf_f16_i64_15: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: ucvtf s1, x0 -; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24 -; CHECK-NO16-NEXT: fcvt h1, s1 -; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fmul s0, s1, s0 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: ucvtf_f16_i64_15: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: ucvtf h0, x0, #15 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: ucvtf_f16_i64_15: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: ucvtf s1, x0 +; CHECK-SD-NO16-NEXT: movi v0.2s, #56, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt h1, s1 +; CHECK-SD-NO16-NEXT: fcvt s1, h1 +; CHECK-SD-NO16-NEXT: fmul s0, s1, s0 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: ucvtf_f16_i64_15: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: ucvtf h0, x0, #15 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: ucvtf_f16_i64_15: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: ucvtf s0, x0 +; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: ucvtf_f16_i64_15: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: ucvtf h0, x0 +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI47_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI47_0] +; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1 +; CHECK-GI-FP16-NEXT: ret %cvt = uitofp i64 %long to half %fix = fdiv half %cvt, 32768.0 ret half %fix @@ -661,150 +1231,285 @@ declare i32 @llvm.fptosi.sat.i32.f16(half) declare i64 @llvm.fptosi.sat.i64.f16(half) define i32 @fcvtzs_sat_f32_i32_7(float %flt) { -; CHECK-LABEL: fcvtzs_sat_f32_i32_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs w0, s0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_sat_f32_i32_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs w0, s0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_sat_f32_i32_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzs w0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 128.0 %cvt = call i32 @llvm.fptosi.sat.i32.f32(float %fix) ret i32 %cvt } define i32 @fcvtzs_sat_f32_i32_32(float %flt) { -; CHECK-LABEL: fcvtzs_sat_f32_i32_32: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs w0, s0, #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_sat_f32_i32_32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs w0, s0, #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_sat_f32_i32_32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1333788672 // =0x4f800000 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzs w0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 4294967296.0 %cvt = call i32 @llvm.fptosi.sat.i32.f32(float %fix) ret i32 %cvt } define i64 @fcvtzs_sat_f32_i64_64(float %flt) { -; CHECK-LABEL: fcvtzs_sat_f32_i64_64: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs x0, s0, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_sat_f32_i64_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs x0, s0, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_sat_f32_i64_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1602224128 // =0x5f800000 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzs x0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 18446744073709551616.0 %cvt = call i64 @llvm.fptosi.sat.i64.f32(float %fix) ret i64 %cvt } define i32 @fcvtzs_sat_f64_i32_7(double %dbl) { -; CHECK-LABEL: fcvtzs_sat_f64_i32_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs w0, d0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_sat_f64_i32_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs w0, d0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_sat_f64_i32_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4638707616191610880 // =0x4060000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzs w0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 128.0 %cvt = call i32 @llvm.fptosi.sat.i32.f64(double %fix) ret i32 %cvt } define i32 @fcvtzs_sat_f64_i32_32(double %dbl) { -; CHECK-LABEL: fcvtzs_sat_f64_i32_32: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs w0, d0, #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_sat_f64_i32_32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs w0, d0, #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_sat_f64_i32_32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4751297606875873280 // =0x41f0000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzs w0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 4294967296.0 %cvt = call i32 @llvm.fptosi.sat.i32.f64(double %fix) ret i32 %cvt } define i64 @fcvtzs_sat_f64_i64_7(double %dbl) { -; CHECK-LABEL: fcvtzs_sat_f64_i64_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs x0, d0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_sat_f64_i64_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs x0, d0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_sat_f64_i64_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4638707616191610880 // =0x4060000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzs x0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 128.0 %cvt = call i64 @llvm.fptosi.sat.i64.f64(double %fix) ret i64 %cvt } define i64 @fcvtzs_sat_f64_i64_64(double %dbl) { -; CHECK-LABEL: fcvtzs_sat_f64_i64_64: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs x0, d0, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_sat_f64_i64_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs x0, d0, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_sat_f64_i64_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4895412794951729152 // =0x43f0000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzs x0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 18446744073709551616.0 %cvt = call i64 @llvm.fptosi.sat.i64.f64(double %fix) ret i64 %cvt } define i32 @fcvtzs_sat_f16_i32_7(half %dbl) { -; CHECK-NO16-LABEL: fcvtzs_sat_f16_i32_7: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzs w0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzs_sat_f16_i32_7: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzs w0, h0, #7 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzs_sat_f16_i32_7: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzs w0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzs_sat_f16_i32_7: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzs w0, h0, #7 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i32_7: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzs w0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzs_sat_f16_i32_7: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI55_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI55_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzs w0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %dbl, 128.0 %cvt = call i32 @llvm.fptosi.sat.i32.f16(half %fix) ret i32 %cvt } define i32 @fcvtzs_sat_f16_i32_15(half %dbl) { -; CHECK-NO16-LABEL: fcvtzs_sat_f16_i32_15: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzs w0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzs_sat_f16_i32_15: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzs w0, h0, #15 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzs_sat_f16_i32_15: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzs w0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzs_sat_f16_i32_15: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzs w0, h0, #15 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i32_15: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzs w0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzs_sat_f16_i32_15: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI56_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI56_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzs w0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %dbl, 32768.0 %cvt = call i32 @llvm.fptosi.sat.i32.f16(half %fix) ret i32 %cvt } define i64 @fcvtzs_sat_f16_i64_7(half %dbl) { -; CHECK-NO16-LABEL: fcvtzs_sat_f16_i64_7: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzs x0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzs_sat_f16_i64_7: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzs x0, h0, #7 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzs_sat_f16_i64_7: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzs x0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzs_sat_f16_i64_7: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzs x0, h0, #7 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i64_7: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzs x0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzs_sat_f16_i64_7: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI57_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI57_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzs x0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %dbl, 128.0 %cvt = call i64 @llvm.fptosi.sat.i64.f16(half %fix) ret i64 %cvt } define i64 @fcvtzs_sat_f16_i64_15(half %dbl) { -; CHECK-NO16-LABEL: fcvtzs_sat_f16_i64_15: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzs x0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzs_sat_f16_i64_15: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzs x0, h0, #15 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzs_sat_f16_i64_15: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzs x0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzs_sat_f16_i64_15: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzs x0, h0, #15 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i64_15: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzs x0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzs_sat_f16_i64_15: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI58_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI58_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzs x0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %dbl, 32768.0 %cvt = call i64 @llvm.fptosi.sat.i64.f16(half %fix) ret i64 %cvt @@ -820,151 +1525,290 @@ declare i32 @llvm.fptoui.sat.i32.f16(half) declare i64 @llvm.fptoui.sat.i64.f16(half) define i32 @fcvtzu_sat_f32_i32_7(float %flt) { -; CHECK-LABEL: fcvtzu_sat_f32_i32_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu w0, s0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_sat_f32_i32_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu w0, s0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_sat_f32_i32_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzu w0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 128.0 %cvt = call i32 @llvm.fptoui.sat.i32.f32(float %fix) ret i32 %cvt } define i32 @fcvtzu_sat_f32_i32_32(float %flt) { -; CHECK-LABEL: fcvtzu_sat_f32_i32_32: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu w0, s0, #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_sat_f32_i32_32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu w0, s0, #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_sat_f32_i32_32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1333788672 // =0x4f800000 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzu w0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 4294967296.0 %cvt = call i32 @llvm.fptoui.sat.i32.f32(float %fix) ret i32 %cvt } define i64 @fcvtzu_sat_f32_i64_64(float %flt) { -; CHECK-LABEL: fcvtzu_sat_f32_i64_64: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu x0, s0, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_sat_f32_i64_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu x0, s0, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_sat_f32_i64_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1602224128 // =0x5f800000 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzu x0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 18446744073709551616.0 %cvt = call i64 @llvm.fptoui.sat.i64.f32(float %fix) ret i64 %cvt } define i32 @fcvtzu_sat_f64_i32_7(double %dbl) { -; CHECK-LABEL: fcvtzu_sat_f64_i32_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu w0, d0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_sat_f64_i32_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu w0, d0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_sat_f64_i32_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4638707616191610880 // =0x4060000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzu w0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 128.0 %cvt = call i32 @llvm.fptoui.sat.i32.f64(double %fix) ret i32 %cvt } define i32 @fcvtzu_sat_f64_i32_32(double %dbl) { -; CHECK-LABEL: fcvtzu_sat_f64_i32_32: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu w0, d0, #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_sat_f64_i32_32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu w0, d0, #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_sat_f64_i32_32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4751297606875873280 // =0x41f0000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzu w0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 4294967296.0 %cvt = call i32 @llvm.fptoui.sat.i32.f64(double %fix) ret i32 %cvt } define i64 @fcvtzu_sat_f64_i64_7(double %dbl) { -; CHECK-LABEL: fcvtzu_sat_f64_i64_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu x0, d0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_sat_f64_i64_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu x0, d0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_sat_f64_i64_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4638707616191610880 // =0x4060000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzu x0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 128.0 %cvt = call i64 @llvm.fptoui.sat.i64.f64(double %fix) ret i64 %cvt } define i64 @fcvtzu_sat_f64_i64_64(double %dbl) { -; CHECK-LABEL: fcvtzu_sat_f64_i64_64: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu x0, d0, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_sat_f64_i64_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu x0, d0, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_sat_f64_i64_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4895412794951729152 // =0x43f0000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzu x0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 18446744073709551616.0 %cvt = call i64 @llvm.fptoui.sat.i64.f64(double %fix) ret i64 %cvt } define i32 @fcvtzu_sat_f16_i32_7(half %dbl) { -; CHECK-NO16-LABEL: fcvtzu_sat_f16_i32_7: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzu w0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzu_sat_f16_i32_7: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzu w0, h0, #7 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzu_sat_f16_i32_7: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzu w0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzu_sat_f16_i32_7: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzu w0, h0, #7 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i32_7: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzu w0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzu_sat_f16_i32_7: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI66_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI66_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzu w0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %dbl, 128.0 %cvt = call i32 @llvm.fptoui.sat.i32.f16(half %fix) ret i32 %cvt } define i32 @fcvtzu_sat_f16_i32_15(half %dbl) { -; CHECK-NO16-LABEL: fcvtzu_sat_f16_i32_15: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzu w0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzu_sat_f16_i32_15: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzu w0, h0, #15 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzu_sat_f16_i32_15: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzu w0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzu_sat_f16_i32_15: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzu w0, h0, #15 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i32_15: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzu w0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzu_sat_f16_i32_15: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI67_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI67_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzu w0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %dbl, 32768.0 %cvt = call i32 @llvm.fptoui.sat.i32.f16(half %fix) ret i32 %cvt } define i64 @fcvtzu_sat_f16_i64_7(half %dbl) { -; CHECK-NO16-LABEL: fcvtzu_sat_f16_i64_7: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzu x0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzu_sat_f16_i64_7: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzu x0, h0, #7 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzu_sat_f16_i64_7: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzu x0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzu_sat_f16_i64_7: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzu x0, h0, #7 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i64_7: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzu x0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzu_sat_f16_i64_7: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI68_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI68_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzu x0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %dbl, 128.0 %cvt = call i64 @llvm.fptoui.sat.i64.f16(half %fix) ret i64 %cvt } define i64 @fcvtzu_sat_f16_i64_15(half %dbl) { -; CHECK-NO16-LABEL: fcvtzu_sat_f16_i64_15: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzu x0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzu_sat_f16_i64_15: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzu x0, h0, #15 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzu_sat_f16_i64_15: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzu x0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzu_sat_f16_i64_15: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzu x0, h0, #15 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i64_15: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzu x0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzu_sat_f16_i64_15: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI69_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI69_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzu x0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %dbl, 32768.0 %cvt = call i64 @llvm.fptoui.sat.i64.f16(half %fix) ret i64 %cvt } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} +; CHECK-FP16: {{.*}} +; CHECK-NO16: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll index bbfec8c7c3361..4ab5db450a7f3 100644 --- a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll +++ b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI define {<2 x half>, <2 x half>} @vector_deinterleave_v2f16_v4f16(<4 x half> %vec) { ; CHECK-SD-LABEL: vector_deinterleave_v2f16_v4f16: diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll index 4cce06dce44c9..d323a7e677b5a 100644 --- a/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll +++ b/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll @@ -1,11 +1,85 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16 ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 -; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16 -; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 +; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 -mattr=+fullfp16 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; Check that constrained fp intrinsics are correctly lowered. +; CHECK-GI: warning: Instruction selection used fallback path for add_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sub_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for mul_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for div_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for frem_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fma_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i32_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_f16_i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_f16_i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_f16_i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_f16_i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_f16_i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_f16_i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrt_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for powi_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sin_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for cos_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tan_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for asin_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for acos_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for atan_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for atan2_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sinh_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for cosh_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tanh_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pow_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log10_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log2_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for exp_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for exp2_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for rint_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for nearbyint_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for llrint_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for maxnum_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for minnum_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ceil_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for floor_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lround_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for llround_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for round_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for trunc_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ldexp_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_olt_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ole_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ogt_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_oge_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_oeq_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_one_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ult_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ule_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ugt_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_uge_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ueq_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_une_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_olt_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ole_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ogt_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_oge_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_oeq_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_one_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ult_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ule_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ugt_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_uge_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ueq_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_une_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptrunc_f16_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fpext_f32_f16 ; Half-precision intrinsics @@ -760,6 +834,21 @@ define half @trunc_f16(half %x) #0 { ret half %val } +define half @ldexp_f16(half %x, i32 %y) #0 { +; CHECK-LABEL: ldexp_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl ldexpf +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %val = call half @llvm.experimental.constrained.ldexp.f16.i32(half %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + define i32 @fcmp_olt_f16(half %a, half %b) #0 { ; CHECK-NOFP16-LABEL: fcmp_olt_f16: ; CHECK-NOFP16: // %bb.0: diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll index 6147afba4e603..83e60c1089762 100644 --- a/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll +++ b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll @@ -1,9 +1,86 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64 %s -disable-strictnode-mutation -o - | FileCheck %s -; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 -disable-strictnode-mutation %s -o - | FileCheck %s +; RUN: llc -mtriple=aarch64 %s -disable-strictnode-mutation -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 -disable-strictnode-mutation %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; Check that constrained fp vector intrinsics are correctly lowered. +; CHECK-GI: warning: Instruction selection used fallback path for add_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sub_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for mul_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for div_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fma_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_v4i32_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_v4i32_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_v4i64_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_v4i64_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_v4f32_v4i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_v4f32_v4i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_v4f32_v4i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_v4f32_v4i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrt_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for rint_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for nearbyint_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for maxnum_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for minnum_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ceil_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for floor_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for round_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for trunc_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for add_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sub_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for mul_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for div_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fma_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_v2i32_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_v2i32_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_v2i64_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_v2i64_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_v2f64_v2i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_v2f64_v2i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_v2f64_v2i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_v2f64_v2i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrt_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for rint_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for nearbyint_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for maxnum_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for minnum_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ceil_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for floor_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for round_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for trunc_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for add_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sub_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for mul_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for div_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fma_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_v1i32_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_v1i32_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_v1i64_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_v1i64_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_v1f64_v1i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_v1f64_v1i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_v1f64_v1i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_v1f64_v1i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrt_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for rint_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for nearbyint_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for maxnum_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for minnum_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ceil_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for floor_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for round_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for trunc_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_v1f61 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_v1f61 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptrunc_v2f32_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fpext_v2f64_v2f32 ; Single-precision intrinsics @@ -882,3 +959,7 @@ declare <1 x i1> @llvm.experimental.constrained.fcmps.v1f64(<1 x double>, <1 x d declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata) + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-GI: {{.*}} +; CHECK-SD: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics.ll index fd3a0c3207606..f2a14a9b73fa1 100644 --- a/llvm/test/CodeGen/AArch64/fp-intrinsics.ll +++ b/llvm/test/CodeGen/AArch64/fp-intrinsics.ll @@ -1,543 +1,1037 @@ -; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s -; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 %s -o - | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; Check that constrained fp intrinsics are correctly lowered. +; CHECK-GI: warning: Instruction selection used fallback path for add_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sub_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for mul_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for div_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for frem_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fma_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i32_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_f32_i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_f32_i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_f32_i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_f32_i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_f32_i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_f32_i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrt_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for powi_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sin_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for cos_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tan_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for asin_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for acos_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for atan_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for atan2_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sinh_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for cosh_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tanh_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pow_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log10_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log2_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for exp_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for exp2_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for rint_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for nearbyint_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for llrint_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for maxnum_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for minnum_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for maximum_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for minimum_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ceil_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for floor_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lround_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for llround_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for round_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for trunc_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_olt_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ole_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ogt_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_oge_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_oeq_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_one_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ult_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ule_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ugt_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_uge_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ueq_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_une_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_olt_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ole_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ogt_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_oge_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_oeq_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_one_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ult_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ule_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ugt_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_uge_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ueq_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_une_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for add_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sub_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for mul_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for div_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for frem_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fma_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i32_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_f64_i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_f64_i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_f64_i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_f64_i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_f64_i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_f64_i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrt_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for powi_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sin_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for cos_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tan_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for asin_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for acos_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for atan_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for atan2_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sinh_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for cosh_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tanh_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pow_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log10_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log2_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for exp_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for exp2_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for rint_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for nearbyint_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for llrint_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for maxnum_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for minnum_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for maximum_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for minimum_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ceil_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for floor_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lround_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for llround_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for round_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for trunc_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_olt_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ole_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ogt_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_oge_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_oeq_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_one_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ult_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ule_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ugt_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_uge_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ueq_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_une_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_olt_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ole_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ogt_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_oge_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_oeq_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_one_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ult_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ule_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ugt_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_uge_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ueq_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_une_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for add_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sub_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for mul_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for div_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for frem_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fma_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i32_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_f128_i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_f128_i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_f128_i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_f128_i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_f128_i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_f128_i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrt_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for powi_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sin_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for cos_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tan_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for asin_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for acos_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for atan_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for atan2_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sinh_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for cosh_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tanh_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pow_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log10_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log2_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for exp_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for exp2_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for rint_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for nearbyint_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for llrint_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for maxnum_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for minnum_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ceil_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for floor_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lround_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for llround_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for round_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for trunc_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_olt_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ole_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ogt_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_oge_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_oeq_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_one_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ult_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ule_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ugt_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_uge_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ueq_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_une_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_olt_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ole_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ogt_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_oge_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_oeq_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_one_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ult_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ule_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ugt_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_uge_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ueq_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_une_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptrunc_f32_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptrunc_f32_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptrunc_f64_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fpext_f64_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fpext_f128_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fpext_f128_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sin_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for cos_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tan_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for asin_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for acos_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for atan_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for atan2_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sinh_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for cosh_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tanh_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pow_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log2_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log10_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for exp_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for exp2_v1f64 + ; Single-precision intrinsics -; CHECK-LABEL: add_f32: -; CHECK: fadd s0, s0, s1 define float @add_f32(float %x, float %y) #0 { +; CHECK-LABEL: add_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: sub_f32: -; CHECK: fsub s0, s0, s1 define float @sub_f32(float %x, float %y) #0 { +; CHECK-LABEL: sub_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub s0, s0, s1 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: mul_f32: -; CHECK: fmul s0, s0, s1 define float @mul_f32(float %x, float %y) #0 { +; CHECK-LABEL: mul_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul s0, s0, s1 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: div_f32: -; CHECK: fdiv s0, s0, s1 define float @div_f32(float %x, float %y) #0 { +; CHECK-LABEL: div_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv s0, s0, s1 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: frem_f32: -; CHECK: bl fmodf define float @frem_f32(float %x, float %y) #0 { +; CHECK-LABEL: frem_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl fmodf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.frem.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: fma_f32: -; CHECK: fmadd s0, s0, s1, s2 define float @fma_f32(float %x, float %y, float %z) #0 { +; CHECK-LABEL: fma_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmadd s0, s0, s1, s2 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: fptosi_i32_f32: -; CHECK: fcvtzs w0, s0 define i32 @fptosi_i32_f32(float %x) #0 { +; CHECK-LABEL: fptosi_i32_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs w0, s0 +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %x, metadata !"fpexcept.strict") #0 ret i32 %val } -; CHECK-LABEL: fptoui_i32_f32: -; CHECK: fcvtzu w0, s0 define i32 @fptoui_i32_f32(float %x) #0 { +; CHECK-LABEL: fptoui_i32_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu w0, s0 +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict") #0 ret i32 %val } -; CHECK-LABEL: fptosi_i64_f32: -; CHECK: fcvtzs x0, s0 define i64 @fptosi_i64_f32(float %x) #0 { +; CHECK-LABEL: fptosi_i64_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs x0, s0 +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %x, metadata !"fpexcept.strict") #0 ret i64 %val } -; CHECK-LABEL: fptoui_i64_f32: -; CHECK: fcvtzu x0, s0 define i64 @fptoui_i64_f32(float %x) #0 { +; CHECK-LABEL: fptoui_i64_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu x0, s0 +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %x, metadata !"fpexcept.strict") #0 ret i64 %val } -; CHECK-LABEL: sitofp_f32_i32: -; CHECK: scvtf s0, w0 define float @sitofp_f32_i32(i32 %x) #0 { +; CHECK-LABEL: sitofp_f32_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf s0, w0 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: uitofp_f32_i32: -; CHECK: ucvtf s0, w0 define float @uitofp_f32_i32(i32 %x) #0 { +; CHECK-LABEL: uitofp_f32_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf s0, w0 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: sitofp_f32_i64: -; CHECK: scvtf s0, x0 define float @sitofp_f32_i64(i64 %x) #0 { +; CHECK-LABEL: sitofp_f32_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf s0, x0 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.sitofp.f32.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: uitofp_f32_i64: -; CHECK: ucvtf s0, x0 define float @uitofp_f32_i64(i64 %x) #0 { +; CHECK-LABEL: uitofp_f32_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf s0, x0 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.uitofp.f32.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: sitofp_f32_i128: -; CHECK: bl __floattisf define float @sitofp_f32_i128(i128 %x) #0 { +; CHECK-LABEL: sitofp_f32_i128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __floattisf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.sitofp.f32.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: uitofp_f32_i128: -; CHECK: bl __floatuntisf define float @uitofp_f32_i128(i128 %x) #0 { +; CHECK-LABEL: uitofp_f32_i128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __floatuntisf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.uitofp.f32.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: sqrt_f32: -; CHECK: fsqrt s0, s0 define float @sqrt_f32(float %x) #0 { +; CHECK-LABEL: sqrt_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fsqrt s0, s0 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.sqrt.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: powi_f32: -; CHECK: bl __powisf2 define float @powi_f32(float %x, i32 %y) #0 { +; CHECK-LABEL: powi_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __powisf2 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.powi.f32(float %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: sin_f32: -; CHECK: bl sinf define float @sin_f32(float %x) #0 { +; CHECK-LABEL: sin_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl sinf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.sin.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: cos_f32: -; CHECK: bl cosf define float @cos_f32(float %x) #0 { +; CHECK-LABEL: cos_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl cosf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.cos.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: tan_f32: -; CHECK: bl tanf define float @tan_f32(float %x) #0 { +; CHECK-LABEL: tan_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.tan.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: asin_f32: -; CHECK: bl asinf define float @asin_f32(float %x) #0 { +; CHECK-LABEL: asin_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl asinf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.asin.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: acos_f32: -; CHECK: bl acosf define float @acos_f32(float %x) #0 { +; CHECK-LABEL: acos_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl acosf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.acos.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: atan_f32: -; CHECK: bl atanf define float @atan_f32(float %x) #0 { +; CHECK-LABEL: atan_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl atanf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.atan.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: atan2_f32: -; CHECK: bl atan2f define float @atan2_f32(float %x, float %y) #0 { +; CHECK-LABEL: atan2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl atan2f +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.atan2.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: sinh_f32: -; CHECK: bl sinhf define float @sinh_f32(float %x) #0 { +; CHECK-LABEL: sinh_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl sinhf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.sinh.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: cosh_f32: -; CHECK: bl coshf define float @cosh_f32(float %x) #0 { +; CHECK-LABEL: cosh_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl coshf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.cosh.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: tanh_f32: -; CHECK: bl tanhf define float @tanh_f32(float %x) #0 { +; CHECK-LABEL: tanh_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl tanhf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.tanh.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: pow_f32: -; CHECK: bl powf define float @pow_f32(float %x, float %y) #0 { +; CHECK-LABEL: pow_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl powf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.pow.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: log_f32: -; CHECK: bl logf define float @log_f32(float %x) #0 { +; CHECK-LABEL: log_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl logf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.log.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: log10_f32: -; CHECK: bl log10f define float @log10_f32(float %x) #0 { +; CHECK-LABEL: log10_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl log10f +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.log10.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: log2_f32: -; CHECK: bl log2f define float @log2_f32(float %x) #0 { +; CHECK-LABEL: log2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl log2f +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.log2.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: exp_f32: -; CHECK: bl expf define float @exp_f32(float %x) #0 { +; CHECK-LABEL: exp_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl expf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.exp.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: exp2_f32: -; CHECK: bl exp2f define float @exp2_f32(float %x) #0 { +; CHECK-LABEL: exp2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl exp2f +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.exp2.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: rint_f32: -; CHECK: frintx s0, s0 define float @rint_f32(float %x) #0 { +; CHECK-LABEL: rint_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.rint.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: nearbyint_f32: -; CHECK: frinti s0, s0 define float @nearbyint_f32(float %x) #0 { +; CHECK-LABEL: nearbyint_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frinti s0, s0 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.nearbyint.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: lrint_f32: -; CHECK: frintx [[REG:s[0-9]+]], s0 -; CHECK: fcvtzs w0, [[REG]] define i32 @lrint_f32(float %x) #0 { +; CHECK-LABEL: lrint_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fcvtzs w0, s0 +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.lrint.i32.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret i32 %val } -; CHECK-LABEL: llrint_f32: -; CHECK: frintx [[REG:s[0-9]+]], s0 -; CHECK: fcvtzs x0, [[REG]] define i64 @llrint_f32(float %x) #0 { +; CHECK-LABEL: llrint_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fcvtzs x0, s0 +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.llrint.i64.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret i64 %val } -; CHECK-LABEL: maxnum_f32: -; CHECK: fmaxnm s0, s0, s1 define float @maxnum_f32(float %x, float %y) #0 { +; CHECK-LABEL: maxnum_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmaxnm s0, s0, s1 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.maxnum.f32(float %x, float %y, metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: minnum_f32: -; CHECK: fminnm s0, s0, s1 define float @minnum_f32(float %x, float %y) #0 { +; CHECK-LABEL: minnum_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fminnm s0, s0, s1 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.minnum.f32(float %x, float %y, metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: maximum_f32: -; CHECK: fmax s0, s0, s1 define float @maximum_f32(float %x, float %y) #0 { +; CHECK-LABEL: maximum_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmax s0, s0, s1 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.maximum.f32(float %x, float %y, metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: minimum_f32: -; CHECK: fmin s0, s0, s1 define float @minimum_f32(float %x, float %y) #0 { +; CHECK-LABEL: minimum_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmin s0, s0, s1 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.minimum.f32(float %x, float %y, metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: ceil_f32: -; CHECK: frintp s0, s0 define float @ceil_f32(float %x) #0 { +; CHECK-LABEL: ceil_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frintp s0, s0 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.ceil.f32(float %x, metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: floor_f32: -; CHECK: frintm s0, s0 define float @floor_f32(float %x) #0 { +; CHECK-LABEL: floor_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frintm s0, s0 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.floor.f32(float %x, metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: lround_f32: -; CHECK: fcvtas w0, s0 define i32 @lround_f32(float %x) #0 { +; CHECK-LABEL: lround_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas w0, s0 +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.lround.i32.f32(float %x, metadata !"fpexcept.strict") #0 ret i32 %val } -; CHECK-LABEL: llround_f32: -; CHECK: fcvtas x0, s0 define i64 @llround_f32(float %x) #0 { +; CHECK-LABEL: llround_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas x0, s0 +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.llround.i64.f32(float %x, metadata !"fpexcept.strict") #0 ret i64 %val } -; CHECK-LABEL: round_f32: -; CHECK: frinta s0, s0 define float @round_f32(float %x) #0 { +; CHECK-LABEL: round_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frinta s0, s0 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.round.f32(float %x, metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: roundeven_f32: -; CHECK: frintn s0, s0 define float @roundeven_f32(float %x) #0 { +; CHECK-LABEL: roundeven_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn s0, s0 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.roundeven.f32(float %x, metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: trunc_f32: -; CHECK: frintz s0, s0 define float @trunc_f32(float %x) #0 { +; CHECK-LABEL: trunc_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz s0, s0 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.trunc.f32(float %x, metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: fcmp_olt_f32: -; CHECK: fcmp s0, s1 define i32 @fcmp_olt_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmp_olt_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: cset w0, mi +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"olt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ole_f32: -; CHECK: fcmp s0, s1 define i32 @fcmp_ole_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmp_ole_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: cset w0, ls +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ole", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ogt_f32: -; CHECK: fcmp s0, s1 define i32 @fcmp_ogt_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmp_ogt_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ogt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_oge_f32: -; CHECK: fcmp s0, s1 define i32 @fcmp_oge_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmp_oge_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: cset w0, ge +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"oge", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_oeq_f32: -; CHECK: fcmp s0, s1 define i32 @fcmp_oeq_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmp_oeq_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"oeq", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_one_f32: -; CHECK: fcmp s0, s1 define i32 @fcmp_one_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmp_one_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: cset w8, mi +; CHECK-NEXT: csinc w0, w8, wzr, le +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"one", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ult_f32: -; CHECK: fcmp s0, s1 define i32 @fcmp_ult_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmp_ult_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ult", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ule_f32: -; CHECK: fcmp s0, s1 define i32 @fcmp_ule_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmp_ule_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: cset w0, le +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ule", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ugt_f32: -; CHECK: fcmp s0, s1 define i32 @fcmp_ugt_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmp_ugt_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ugt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_uge_f32: -; CHECK: fcmp s0, s1 define i32 @fcmp_uge_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmp_uge_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: cset w0, pl +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"uge", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ueq_f32: -; CHECK: fcmp s0, s1 define i32 @fcmp_ueq_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmp_ueq_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: csinc w0, w8, wzr, vc +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ueq", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_une_f32: -; CHECK: fcmp s0, s1 define i32 @fcmp_une_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmp_une_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"une", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_olt_f32: -; CHECK: fcmpe s0, s1 define i32 @fcmps_olt_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmps_olt_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe s0, s1 +; CHECK-NEXT: cset w0, mi +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"olt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ole_f32: -; CHECK: fcmpe s0, s1 define i32 @fcmps_ole_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmps_ole_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe s0, s1 +; CHECK-NEXT: cset w0, ls +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ole", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ogt_f32: -; CHECK: fcmpe s0, s1 define i32 @fcmps_ogt_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmps_ogt_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe s0, s1 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ogt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_oge_f32: -; CHECK: fcmpe s0, s1 define i32 @fcmps_oge_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmps_oge_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe s0, s1 +; CHECK-NEXT: cset w0, ge +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"oge", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_oeq_f32: -; CHECK: fcmpe s0, s1 define i32 @fcmps_oeq_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmps_oeq_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe s0, s1 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"oeq", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_one_f32: -; CHECK: fcmpe s0, s1 define i32 @fcmps_one_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmps_one_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe s0, s1 +; CHECK-NEXT: cset w8, mi +; CHECK-NEXT: csinc w0, w8, wzr, le +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"one", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ult_f32: -; CHECK: fcmpe s0, s1 define i32 @fcmps_ult_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmps_ult_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe s0, s1 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ult", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ule_f32: -; CHECK: fcmpe s0, s1 define i32 @fcmps_ule_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmps_ule_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe s0, s1 +; CHECK-NEXT: cset w0, le +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ule", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ugt_f32: -; CHECK: fcmpe s0, s1 define i32 @fcmps_ugt_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmps_ugt_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe s0, s1 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ugt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_uge_f32: -; CHECK: fcmpe s0, s1 define i32 @fcmps_uge_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmps_uge_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe s0, s1 +; CHECK-NEXT: cset w0, pl +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"uge", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ueq_f32: -; CHECK: fcmpe s0, s1 define i32 @fcmps_ueq_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmps_ueq_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe s0, s1 +; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: csinc w0, w8, wzr, vc +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ueq", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_une_f32: -; CHECK: fcmpe s0, s1 define i32 @fcmps_une_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmps_une_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe s0, s1 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"une", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv @@ -546,538 +1040,792 @@ define i32 @fcmps_une_f32(float %a, float %b) #0 { ; Double-precision intrinsics -; CHECK-LABEL: add_f64: -; CHECK: fadd d0, d0, d1 define double @add_f64(double %x, double %y) #0 { +; CHECK-LABEL: add_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd d0, d0, d1 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: sub_f64: -; CHECK: fsub d0, d0, d1 define double @sub_f64(double %x, double %y) #0 { +; CHECK-LABEL: sub_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub d0, d0, d1 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.fsub.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: mul_f64: -; CHECK: fmul d0, d0, d1 define double @mul_f64(double %x, double %y) #0 { +; CHECK-LABEL: mul_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul d0, d0, d1 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.fmul.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: div_f64: -; CHECK: fdiv d0, d0, d1 define double @div_f64(double %x, double %y) #0 { +; CHECK-LABEL: div_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv d0, d0, d1 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.fdiv.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: frem_f64: -; CHECK: bl fmod define double @frem_f64(double %x, double %y) #0 { +; CHECK-LABEL: frem_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl fmod +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.frem.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: fma_f64: -; CHECK: fmadd d0, d0, d1, d2 define double @fma_f64(double %x, double %y, double %z) #0 { +; CHECK-LABEL: fma_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmadd d0, d0, d1, d2 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.fma.f64(double %x, double %y, double %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: fptosi_i32_f64: -; CHECK: fcvtzs w0, d0 define i32 @fptosi_i32_f64(double %x) #0 { +; CHECK-LABEL: fptosi_i32_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs w0, d0 +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %x, metadata !"fpexcept.strict") #0 ret i32 %val } -; CHECK-LABEL: fptoui_i32_f64: -; CHECK: fcvtzu w0, d0 define i32 @fptoui_i32_f64(double %x) #0 { +; CHECK-LABEL: fptoui_i32_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu w0, d0 +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %x, metadata !"fpexcept.strict") #0 ret i32 %val } -; CHECK-LABEL: fptosi_i64_f64: -; CHECK: fcvtzs x0, d0 define i64 @fptosi_i64_f64(double %x) #0 { +; CHECK-LABEL: fptosi_i64_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs x0, d0 +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %x, metadata !"fpexcept.strict") #0 ret i64 %val } -; CHECK-LABEL: fptoui_i64_f64: -; CHECK: fcvtzu x0, d0 define i64 @fptoui_i64_f64(double %x) #0 { +; CHECK-LABEL: fptoui_i64_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu x0, d0 +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %x, metadata !"fpexcept.strict") #0 ret i64 %val } -; CHECK-LABEL: sitofp_f64_i32: -; CHECK: scvtf d0, w0 define double @sitofp_f64_i32(i32 %x) #0 { +; CHECK-LABEL: sitofp_f64_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf d0, w0 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: uitofp_f64_i32: -; CHECK: ucvtf d0, w0 define double @uitofp_f64_i32(i32 %x) #0 { +; CHECK-LABEL: uitofp_f64_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf d0, w0 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: sitofp_f64_i64: -; CHECK: scvtf d0, x0 define double @sitofp_f64_i64(i64 %x) #0 { +; CHECK-LABEL: sitofp_f64_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf d0, x0 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.sitofp.f64.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: uitofp_f64_i64: -; CHECK: ucvtf d0, x0 define double @uitofp_f64_i64(i64 %x) #0 { +; CHECK-LABEL: uitofp_f64_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf d0, x0 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: sitofp_f64_i128: -; CHECK: bl __floattidf define double @sitofp_f64_i128(i128 %x) #0 { +; CHECK-LABEL: sitofp_f64_i128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __floattidf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.sitofp.f64.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: uitofp_f64_i128: -; CHECK: bl __floatuntidf define double @uitofp_f64_i128(i128 %x) #0 { +; CHECK-LABEL: uitofp_f64_i128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __floatuntidf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.uitofp.f64.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: sqrt_f64: -; CHECK: fsqrt d0, d0 define double @sqrt_f64(double %x) #0 { +; CHECK-LABEL: sqrt_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fsqrt d0, d0 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.sqrt.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: powi_f64: -; CHECK: bl __powidf2 define double @powi_f64(double %x, i32 %y) #0 { +; CHECK-LABEL: powi_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __powidf2 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.powi.f64(double %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: sin_f64: -; CHECK: bl sin define double @sin_f64(double %x) #0 { +; CHECK-LABEL: sin_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl sin +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.sin.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: cos_f64: -; CHECK: bl cos define double @cos_f64(double %x) #0 { +; CHECK-LABEL: cos_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl cos +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.cos.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: tan_f64: -; CHECK: bl tan define double @tan_f64(double %x) #0 { +; CHECK-LABEL: tan_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl tan +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.tan.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: asin_f64: -; CHECK: bl asin define double @asin_f64(double %x) #0 { +; CHECK-LABEL: asin_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl asin +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.asin.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: acos_f64: -; CHECK: bl acos define double @acos_f64(double %x) #0 { +; CHECK-LABEL: acos_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl acos +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.acos.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: atan_f64: -; CHECK: bl atan define double @atan_f64(double %x) #0 { +; CHECK-LABEL: atan_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl atan +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.atan.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: atan2_f64: -; CHECK: bl atan2 define double @atan2_f64(double %x, double %y) #0 { +; CHECK-LABEL: atan2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl atan2 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.atan2.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: sinh_f64: -; CHECK: bl sinh define double @sinh_f64(double %x) #0 { +; CHECK-LABEL: sinh_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl sinh +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.sinh.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: cosh_f64: -; CHECK: bl cosh define double @cosh_f64(double %x) #0 { +; CHECK-LABEL: cosh_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl cosh +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.cosh.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: tanh_f64: -; CHECK: bl tanh define double @tanh_f64(double %x) #0 { +; CHECK-LABEL: tanh_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl tanh +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.tanh.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: pow_f64: -; CHECK: bl pow define double @pow_f64(double %x, double %y) #0 { +; CHECK-LABEL: pow_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl pow +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.pow.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: log_f64: -; CHECK: bl log define double @log_f64(double %x) #0 { +; CHECK-LABEL: log_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl log +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.log.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: log10_f64: -; CHECK: bl log10 define double @log10_f64(double %x) #0 { +; CHECK-LABEL: log10_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl log10 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.log10.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: log2_f64: -; CHECK: bl log2 define double @log2_f64(double %x) #0 { +; CHECK-LABEL: log2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl log2 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.log2.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: exp_f64: -; CHECK: bl exp define double @exp_f64(double %x) #0 { +; CHECK-LABEL: exp_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl exp +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.exp.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: exp2_f64: -; CHECK: bl exp2 define double @exp2_f64(double %x) #0 { +; CHECK-LABEL: exp2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl exp2 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.exp2.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: rint_f64: -; CHECK: frintx d0, d0 define double @rint_f64(double %x) #0 { +; CHECK-LABEL: rint_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.rint.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: nearbyint_f64: -; CHECK: frinti d0, d0 define double @nearbyint_f64(double %x) #0 { +; CHECK-LABEL: nearbyint_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frinti d0, d0 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.nearbyint.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: lrint_f64: -; CHECK: frintx [[REG:d[0-9]+]], d0 -; CHECK: fcvtzs w0, [[REG]] define i32 @lrint_f64(double %x) #0 { +; CHECK-LABEL: lrint_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: fcvtzs w0, d0 +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.lrint.i32.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret i32 %val } -; CHECK-LABEL: llrint_f64: -; CHECK: frintx [[REG:d[0-9]+]], d0 -; CHECK: fcvtzs x0, [[REG]] define i64 @llrint_f64(double %x) #0 { +; CHECK-LABEL: llrint_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: fcvtzs x0, d0 +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.llrint.i64.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret i64 %val } -; CHECK-LABEL: maxnum_f64: -; CHECK: fmaxnm d0, d0, d1 define double @maxnum_f64(double %x, double %y) #0 { +; CHECK-LABEL: maxnum_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmaxnm d0, d0, d1 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.maxnum.f64(double %x, double %y, metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: minnum_f64: -; CHECK: fminnm d0, d0, d1 define double @minnum_f64(double %x, double %y) #0 { +; CHECK-LABEL: minnum_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fminnm d0, d0, d1 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.minnum.f64(double %x, double %y, metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: maximum_f64: -; CHECK: fmax d0, d0, d1 define double @maximum_f64(double %x, double %y) #0 { +; CHECK-LABEL: maximum_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmax d0, d0, d1 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.maximum.f64(double %x, double %y, metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: minimum_f64: -; CHECK: fmin d0, d0, d1 define double @minimum_f64(double %x, double %y) #0 { +; CHECK-LABEL: minimum_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmin d0, d0, d1 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.minimum.f64(double %x, double %y, metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: ceil_f64: -; CHECK: frintp d0, d0 define double @ceil_f64(double %x) #0 { +; CHECK-LABEL: ceil_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintp d0, d0 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.ceil.f64(double %x, metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: floor_f64: -; CHECK: frintm d0, d0 define double @floor_f64(double %x) #0 { +; CHECK-LABEL: floor_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintm d0, d0 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.floor.f64(double %x, metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: lround_f64: -; CHECK: fcvtas w0, d0 define i32 @lround_f64(double %x) #0 { +; CHECK-LABEL: lround_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas w0, d0 +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.lround.i32.f64(double %x, metadata !"fpexcept.strict") #0 ret i32 %val } -; CHECK-LABEL: llround_f64: -; CHECK: fcvtas x0, d0 define i64 @llround_f64(double %x) #0 { +; CHECK-LABEL: llround_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas x0, d0 +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.llround.i64.f64(double %x, metadata !"fpexcept.strict") #0 ret i64 %val } -; CHECK-LABEL: round_f64: -; CHECK: frinta d0, d0 define double @round_f64(double %x) #0 { +; CHECK-LABEL: round_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frinta d0, d0 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.round.f64(double %x, metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: roundeven_f64: -; CHECK: frintn d0, d0 define double @roundeven_f64(double %x) #0 { +; CHECK-LABEL: roundeven_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn d0, d0 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.roundeven.f64(double %x, metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: trunc_f64: -; CHECK: frintz d0, d0 define double @trunc_f64(double %x) #0 { +; CHECK-LABEL: trunc_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz d0, d0 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.trunc.f64(double %x, metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: fcmp_olt_f64: -; CHECK: fcmp d0, d1 define i32 @fcmp_olt_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmp_olt_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: cset w0, mi +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"olt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ole_f64: -; CHECK: fcmp d0, d1 define i32 @fcmp_ole_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmp_ole_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: cset w0, ls +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ole", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ogt_f64: -; CHECK: fcmp d0, d1 define i32 @fcmp_ogt_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmp_ogt_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ogt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_oge_f64: -; CHECK: fcmp d0, d1 define i32 @fcmp_oge_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmp_oge_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: cset w0, ge +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oge", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_oeq_f64: -; CHECK: fcmp d0, d1 define i32 @fcmp_oeq_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmp_oeq_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_one_f64: -; CHECK: fcmp d0, d1 define i32 @fcmp_one_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmp_one_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: cset w8, mi +; CHECK-NEXT: csinc w0, w8, wzr, le +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"one", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ult_f64: -; CHECK: fcmp d0, d1 define i32 @fcmp_ult_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmp_ult_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ult", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ule_f64: -; CHECK: fcmp d0, d1 define i32 @fcmp_ule_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmp_ule_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: cset w0, le +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ule", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ugt_f64: -; CHECK: fcmp d0, d1 define i32 @fcmp_ugt_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmp_ugt_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ugt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_uge_f64: -; CHECK: fcmp d0, d1 define i32 @fcmp_uge_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmp_uge_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: cset w0, pl +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"uge", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ueq_f64: -; CHECK: fcmp d0, d1 define i32 @fcmp_ueq_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmp_ueq_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: csinc w0, w8, wzr, vc +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ueq", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_une_f64: -; CHECK: fcmp d0, d1 define i32 @fcmp_une_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmp_une_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"une", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_olt_f64: -; CHECK: fcmpe d0, d1 define i32 @fcmps_olt_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmps_olt_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe d0, d1 +; CHECK-NEXT: cset w0, mi +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"olt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ole_f64: -; CHECK: fcmpe d0, d1 define i32 @fcmps_ole_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmps_ole_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe d0, d1 +; CHECK-NEXT: cset w0, ls +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ole", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ogt_f64: -; CHECK: fcmpe d0, d1 define i32 @fcmps_ogt_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmps_ogt_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe d0, d1 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ogt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_oge_f64: -; CHECK: fcmpe d0, d1 define i32 @fcmps_oge_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmps_oge_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe d0, d1 +; CHECK-NEXT: cset w0, ge +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oge", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_oeq_f64: -; CHECK: fcmpe d0, d1 define i32 @fcmps_oeq_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmps_oeq_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe d0, d1 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_one_f64: -; CHECK: fcmpe d0, d1 define i32 @fcmps_one_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmps_one_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe d0, d1 +; CHECK-NEXT: cset w8, mi +; CHECK-NEXT: csinc w0, w8, wzr, le +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"one", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ult_f64: -; CHECK: fcmpe d0, d1 define i32 @fcmps_ult_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmps_ult_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe d0, d1 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ult", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ule_f64: -; CHECK: fcmpe d0, d1 define i32 @fcmps_ule_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmps_ule_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe d0, d1 +; CHECK-NEXT: cset w0, le +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ule", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ugt_f64: -; CHECK: fcmpe d0, d1 define i32 @fcmps_ugt_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmps_ugt_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe d0, d1 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ugt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_uge_f64: -; CHECK: fcmpe d0, d1 define i32 @fcmps_uge_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmps_uge_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe d0, d1 +; CHECK-NEXT: cset w0, pl +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"uge", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ueq_f64: -; CHECK: fcmpe d0, d1 define i32 @fcmps_ueq_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmps_ueq_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe d0, d1 +; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: csinc w0, w8, wzr, vc +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ueq", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_une_f64: -; CHECK: fcmpe d0, d1 define i32 @fcmps_une_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmps_une_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe d0, d1 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"une", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv @@ -1086,515 +1834,1015 @@ define i32 @fcmps_une_f64(double %a, double %b) #0 { ; Long-double-precision intrinsics -; CHECK-LABEL: add_f128: -; CHECK: bl __addtf3 define fp128 @add_f128(fp128 %x, fp128 %y) #0 { +; CHECK-LABEL: add_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __addtf3 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.fadd.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: sub_f128: -; CHECK: bl __subtf3 define fp128 @sub_f128(fp128 %x, fp128 %y) #0 { +; CHECK-LABEL: sub_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __subtf3 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.fsub.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: mul_f128: -; CHECK: bl __multf3 define fp128 @mul_f128(fp128 %x, fp128 %y) #0 { +; CHECK-LABEL: mul_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __multf3 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.fmul.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: div_f128: -; CHECK: bl __divtf3 define fp128 @div_f128(fp128 %x, fp128 %y) #0 { +; CHECK-LABEL: div_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __divtf3 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.fdiv.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: frem_f128: -; CHECK: bl fmodl define fp128 @frem_f128(fp128 %x, fp128 %y) #0 { +; CHECK-LABEL: frem_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl fmodl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.frem.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: fma_f128: -; CHECK: fmal define fp128 @fma_f128(fp128 %x, fp128 %y, fp128 %z) #0 { +; CHECK-LABEL: fma_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl fmal +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.fma.f128(fp128 %x, fp128 %y, fp128 %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: fptosi_i32_f128: -; CHECK: bl __fixtfsi define i32 @fptosi_i32_f128(fp128 %x) #0 { +; CHECK-LABEL: fptosi_i32_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret i32 %val } -; CHECK-LABEL: fptoui_i32_f128: -; CHECK: bl __fixunstfsi define i32 @fptoui_i32_f128(fp128 %x) #0 { +; CHECK-LABEL: fptoui_i32_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __fixunstfsi +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret i32 %val } -; CHECK-LABEL: fptosi_i64_f128: -; CHECK: bl __fixtfdi define i64 @fptosi_i64_f128(fp128 %x) #0 { +; CHECK-LABEL: fptosi_i64_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __fixtfdi +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret i64 %val } -; CHECK-LABEL: fptoui_i64_f128: -; CHECK: bl __fixunstfdi define i64 @fptoui_i64_f128(fp128 %x) #0 { +; CHECK-LABEL: fptoui_i64_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __fixunstfdi +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret i64 %val } -; CHECK-LABEL: sitofp_f128_i32: -; CHECK: bl __floatsitf define fp128 @sitofp_f128_i32(i32 %x) #0 { +; CHECK-LABEL: sitofp_f128_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __floatsitf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.sitofp.f128.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: uitofp_f128_i32: -; CHECK: bl __floatunsitf define fp128 @uitofp_f128_i32(i32 %x) #0 { +; CHECK-LABEL: uitofp_f128_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __floatunsitf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.uitofp.f128.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: sitofp_f128_i64: -; CHECK: bl __floatditf define fp128 @sitofp_f128_i64(i64 %x) #0 { +; CHECK-LABEL: sitofp_f128_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __floatditf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.sitofp.f128.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: uitofp_f128_i64: -; CHECK: bl __floatunditf define fp128 @uitofp_f128_i64(i64 %x) #0 { +; CHECK-LABEL: uitofp_f128_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __floatunditf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.uitofp.f128.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: sitofp_f128_i128: -; CHECK: bl __floattitf define fp128 @sitofp_f128_i128(i128 %x) #0 { +; CHECK-LABEL: sitofp_f128_i128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __floattitf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.sitofp.f128.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: uitofp_f128_i128: -; CHECK: bl __floatuntitf define fp128 @uitofp_f128_i128(i128 %x) #0 { +; CHECK-LABEL: uitofp_f128_i128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __floatuntitf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.uitofp.f128.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: sqrt_f128: -; CHECK: bl sqrtl define fp128 @sqrt_f128(fp128 %x) #0 { +; CHECK-LABEL: sqrt_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl sqrtl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.sqrt.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: powi_f128: -; CHECK: bl __powitf2 define fp128 @powi_f128(fp128 %x, i32 %y) #0 { +; CHECK-LABEL: powi_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __powitf2 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.powi.f128(fp128 %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: sin_f128: -; CHECK: bl sinl define fp128 @sin_f128(fp128 %x) #0 { +; CHECK-LABEL: sin_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl sinl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.sin.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: cos_f128: -; CHECK: bl cosl define fp128 @cos_f128(fp128 %x) #0 { +; CHECK-LABEL: cos_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl cosl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.cos.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: tan_f128: -; CHECK: bl tanl define fp128 @tan_f128(fp128 %x) #0 { +; CHECK-LABEL: tan_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl tanl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.tan.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: asin_f128: -; CHECK: bl asinl define fp128 @asin_f128(fp128 %x) #0 { +; CHECK-LABEL: asin_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl asinl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.asin.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: acos_f128: -; CHECK: bl acosl define fp128 @acos_f128(fp128 %x) #0 { +; CHECK-LABEL: acos_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl acosl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.acos.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: atan_f128: -; CHECK: bl atanl define fp128 @atan_f128(fp128 %x) #0 { +; CHECK-LABEL: atan_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl atanl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.atan.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: atan2_f128: -; CHECK: bl atan2l define fp128 @atan2_f128(fp128 %x, fp128 %y) #0 { +; CHECK-LABEL: atan2_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl atan2l +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.atan2.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: sinh_f128: -; CHECK: bl sinhl define fp128 @sinh_f128(fp128 %x) #0 { +; CHECK-LABEL: sinh_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl sinhl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.sinh.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: cosh_f128: -; CHECK: bl coshl define fp128 @cosh_f128(fp128 %x) #0 { +; CHECK-LABEL: cosh_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl coshl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.cosh.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: tanh_f128: -; CHECK: bl tanhl define fp128 @tanh_f128(fp128 %x) #0 { +; CHECK-LABEL: tanh_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl tanhl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.tanh.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: pow_f128: -; CHECK: bl powl define fp128 @pow_f128(fp128 %x, fp128 %y) #0 { +; CHECK-LABEL: pow_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl powl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.pow.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: log_f128: -; CHECK: bl logl define fp128 @log_f128(fp128 %x) #0 { +; CHECK-LABEL: log_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl logl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.log.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: log10_f128: -; CHECK: bl log10l define fp128 @log10_f128(fp128 %x) #0 { +; CHECK-LABEL: log10_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl log10l +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.log10.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: log2_f128: -; CHECK: bl log2l define fp128 @log2_f128(fp128 %x) #0 { +; CHECK-LABEL: log2_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl log2l +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.log2.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: exp_f128: -; CHECK: bl expl define fp128 @exp_f128(fp128 %x) #0 { +; CHECK-LABEL: exp_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl expl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.exp.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: exp2_f128: -; CHECK: bl exp2l define fp128 @exp2_f128(fp128 %x) #0 { +; CHECK-LABEL: exp2_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl exp2l +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.exp2.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: rint_f128: -; CHECK: bl rintl define fp128 @rint_f128(fp128 %x) #0 { +; CHECK-LABEL: rint_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl rintl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.rint.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: nearbyint_f128: -; CHECK: bl nearbyintl define fp128 @nearbyint_f128(fp128 %x) #0 { +; CHECK-LABEL: nearbyint_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl nearbyintl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.nearbyint.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: lrint_f128: -; CHECK: bl lrintl define i32 @lrint_f128(fp128 %x) #0 { +; CHECK-LABEL: lrint_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl lrintl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.lrint.i32.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret i32 %val } -; CHECK-LABEL: llrint_f128: -; CHECK: bl llrintl define i64 @llrint_f128(fp128 %x) #0 { +; CHECK-LABEL: llrint_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.llrint.i64.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret i64 %val } -; CHECK-LABEL: maxnum_f128: -; CHECK: bl fmaxl define fp128 @maxnum_f128(fp128 %x, fp128 %y) #0 { +; CHECK-LABEL: maxnum_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl fmaxl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.maxnum.f128(fp128 %x, fp128 %y, metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: minnum_f128: -; CHECK: bl fminl define fp128 @minnum_f128(fp128 %x, fp128 %y) #0 { +; CHECK-LABEL: minnum_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl fminl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.minnum.f128(fp128 %x, fp128 %y, metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: ceil_f128: -; CHECK: bl ceill define fp128 @ceil_f128(fp128 %x) #0 { +; CHECK-LABEL: ceil_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl ceill +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.ceil.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: floor_f128: -; CHECK: bl floorl define fp128 @floor_f128(fp128 %x) #0 { +; CHECK-LABEL: floor_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl floorl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.floor.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: lround_f128: -; CHECK: bl lroundl define i32 @lround_f128(fp128 %x) #0 { +; CHECK-LABEL: lround_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl lroundl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.lround.i32.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret i32 %val } -; CHECK-LABEL: llround_f128: -; CHECK: bl llroundl define i64 @llround_f128(fp128 %x) #0 { +; CHECK-LABEL: llround_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl llroundl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.llround.i64.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret i64 %val } -; CHECK-LABEL: round_f128: -; CHECK: bl roundl define fp128 @round_f128(fp128 %x) #0 { +; CHECK-LABEL: round_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl roundl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.round.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: trunc_f128: -; CHECK: bl truncl define fp128 @trunc_f128(fp128 %x) #0 { +; CHECK-LABEL: trunc_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl truncl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.trunc.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: fcmp_olt_f128: -; CHECK: bl __lttf2 define i32 @fcmp_olt_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmp_olt_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __lttf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"olt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ole_f128: -; CHECK: bl __letf2 define i32 @fcmp_ole_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmp_ole_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __letf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, le +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"ole", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ogt_f128: -; CHECK: bl __gttf2 define i32 @fcmp_ogt_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmp_ogt_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __gttf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"ogt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_oge_f128: -; CHECK: bl __getf2 define i32 @fcmp_oge_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmp_oge_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __getf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ge +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"oge", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_oeq_f128: -; CHECK: bl __eqtf2 define i32 @fcmp_oeq_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmp_oeq_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __eqtf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"oeq", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_one_f128: -; CHECK: bl __eqtf2 define i32 @fcmp_one_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmp_one_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill +; CHECK-NEXT: bl __eqtf2 +; CHECK-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload +; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: bl __unordtf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: ccmp w19, #0, #4, eq +; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"one", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ult_f128: -; CHECK: bl __getf2 define i32 @fcmp_ult_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmp_ult_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __getf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"ult", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ule_f128: -; CHECK: bl __gttf2 define i32 @fcmp_ule_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmp_ule_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __gttf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, le +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"ule", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ugt_f128: -; CHECK: bl __letf2 define i32 @fcmp_ugt_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmp_ugt_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __letf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"ugt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_uge_f128: -; CHECK: bl __lttf2 define i32 @fcmp_uge_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmp_uge_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __lttf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ge +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"uge", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ueq_f128: -; CHECK: bl __eqtf2 define i32 @fcmp_ueq_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmp_ueq_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill +; CHECK-NEXT: bl __eqtf2 +; CHECK-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload +; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: bl __unordtf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: ccmp w19, #0, #4, eq +; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"ueq", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_une_f128: -; CHECK: bl __netf2 define i32 @fcmp_une_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmp_une_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __netf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"une", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_olt_f128: -; CHECK: bl __lttf2 define i32 @fcmps_olt_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmps_olt_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __lttf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"olt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ole_f128: -; CHECK: bl __letf2 define i32 @fcmps_ole_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmps_ole_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __letf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, le +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"ole", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ogt_f128: -; CHECK: bl __gttf2 define i32 @fcmps_ogt_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmps_ogt_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __gttf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"ogt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_oge_f128: -; CHECK: bl __getf2 define i32 @fcmps_oge_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmps_oge_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __getf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ge +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"oge", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_oeq_f128: -; CHECK: bl __eqtf2 define i32 @fcmps_oeq_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmps_oeq_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __eqtf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"oeq", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_one_f128: -; CHECK: bl __eqtf2 define i32 @fcmps_one_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmps_one_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill +; CHECK-NEXT: bl __eqtf2 +; CHECK-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload +; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: bl __unordtf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: ccmp w19, #0, #4, eq +; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"one", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ult_f128: -; CHECK: bl __getf2 define i32 @fcmps_ult_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmps_ult_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __getf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"ult", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ule_f128: -; CHECK: bl __gttf2 define i32 @fcmps_ule_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmps_ule_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __gttf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, le +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"ule", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ugt_f128: -; CHECK: bl __letf2 define i32 @fcmps_ugt_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmps_ugt_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __letf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"ugt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_uge_f128: -; CHECK: bl __lttf2 define i32 @fcmps_uge_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmps_uge_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __lttf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ge +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"uge", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ueq_f128: -; CHECK: bl __eqtf2 define i32 @fcmps_ueq_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmps_ueq_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill +; CHECK-NEXT: bl __eqtf2 +; CHECK-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload +; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: bl __unordtf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: ccmp w19, #0, #4, eq +; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"ueq", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_une_f128: -; CHECK: bl __netf2 define i32 @fcmps_une_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmps_une_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __netf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"une", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv @@ -1603,156 +2851,280 @@ define i32 @fcmps_une_f128(fp128 %a, fp128 %b) #0 { ; Intrinsics to convert between floating-point types -; CHECK-LABEL: fptrunc_f32_f64: -; CHECK: fcvt s0, d0 define float @fptrunc_f32_f64(double %x) #0 { +; CHECK-LABEL: fptrunc_f32_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvt s0, d0 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.fptrunc.f32.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: fptrunc_f32_f128: -; CHECK: bl __trunctfsf2 define float @fptrunc_f32_f128(fp128 %x) #0 { +; CHECK-LABEL: fptrunc_f32_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __trunctfsf2 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.fptrunc.f32.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: fptrunc_f64_f128: -; CHECK: bl __trunctfdf2 define double @fptrunc_f64_f128(fp128 %x) #0 { +; CHECK-LABEL: fptrunc_f64_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __trunctfdf2 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.fptrunc.f64.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: fpext_f64_f32: -; CHECK: fcvt d0, s0 define double @fpext_f64_f32(float %x) #0 { +; CHECK-LABEL: fpext_f64_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvt d0, s0 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.fpext.f64.f32(float %x, metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: fpext_f128_f32: -; CHECK: bl __extendsftf2 define fp128 @fpext_f128_f32(float %x) #0 { +; CHECK-LABEL: fpext_f128_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __extendsftf2 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.fpext.f128.f32(float %x, metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: fpext_f128_f64: -; CHECK: bl __extenddftf2 define fp128 @fpext_f128_f64(double %x) #0 { +; CHECK-LABEL: fpext_f128_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __extenddftf2 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.fpext.f128.f64(double %x, metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: sin_v1f64: -; CHECK: bl sin define <1 x double> @sin_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: sin_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl sin +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.sin.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: cos_v1f64: -; CHECK: bl cos define <1 x double> @cos_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: cos_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl cos +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.cos.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: tan_v1f64: -; CHECK: bl tan define <1 x double> @tan_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: tan_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl tan +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.tan.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: asin_v1f64: -; CHECK: bl asin define <1 x double> @asin_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: asin_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl asin +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.asin.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: acos_v1f64: -; CHECK: bl acos define <1 x double> @acos_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: acos_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl acos +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.acos.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: atan_v1f64: -; CHECK: bl atan define <1 x double> @atan_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: atan_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl atan +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.atan.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: atan2_v1f64: -; CHECK: bl atan2 define <1 x double> @atan2_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: atan2_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl atan2 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.atan2.v1f64(<1 x double> %x, <1 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: sinh_v1f64: -; CHECK: bl sinh define <1 x double> @sinh_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: sinh_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl sinh +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.sinh.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: cosh_v1f64: -; CHECK: bl cosh define <1 x double> @cosh_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: cosh_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl cosh +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.cosh.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: tanh_v1f64: -; CHECK: bl tanh define <1 x double> @tanh_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: tanh_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl tanh +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.tanh.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: pow_v1f64: -; CHECK: bl pow define <1 x double> @pow_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: pow_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl pow +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.pow.v1f64(<1 x double> %x, <1 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: log_v1f64: -; CHECK: bl log define <1 x double> @log_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: log_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl log +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.log.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: log2_v1f64: -; CHECK: bl log2 define <1 x double> @log2_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: log2_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl log2 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.log2.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: log10_v1f64: -; CHECK: bl log10 define <1 x double> @log10_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: log10_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl log10 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.log10.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: exp_v1f64: -; CHECK: bl exp define <1 x double> @exp_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: exp_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl exp +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.exp.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: exp2_v1f64: -; CHECK: bl exp2 define <1 x double> @exp2_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: exp2_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl exp2 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.exp2.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } @@ -1918,3 +3290,7 @@ declare double @llvm.experimental.constrained.fptrunc.f64.f128(fp128, metadata, declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) declare fp128 @llvm.experimental.constrained.fpext.f128.f32(float, metadata) declare fp128 @llvm.experimental.constrained.fpext.f128.f64(double, metadata) + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-GI: {{.*}} +; CHECK-SD: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll index 17c87a5dae419..bfb5c67801e6c 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-CVT ; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16 -; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-CVT -; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 +; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-CVT +; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 ; ; 32-bit float to signed integer diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll index 3c19fca4a22ae..0dea7be5052d0 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-CVT ; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16 -; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-CVT -; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 +; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-CVT +; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 ; ; 32-bit float to unsigned integer diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll index 20a6dd0899b40..3037a9552bc27 100644 --- a/llvm/test/CodeGen/AArch64/funnel-shift.ll +++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i8 @llvm.fshl.i8(i8, i8, i8) declare i16 @llvm.fshl.i16(i16, i16, i16) diff --git a/llvm/test/CodeGen/AArch64/itofp-bf16.ll b/llvm/test/CodeGen/AArch64/itofp-bf16.ll index 978fe0b5ba3b3..58591b11c184f 100644 --- a/llvm/test/CodeGen/AArch64/itofp-bf16.ll +++ b/llvm/test/CodeGen/AArch64/itofp-bf16.ll @@ -4,6 +4,63 @@ ; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16 ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 +; CHECK-GI: warning: Instruction selection used fallback path for stofp_i64_bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_i64_bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_i32_bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_i32_bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_i16_bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_i16_bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_i8_bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_i8_bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v2i64_v2bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v2i64_v2bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v3i64_v3bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v3i64_v3bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v4i64_v4bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v4i64_v4bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v8i64_v8bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v8i64_v8bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v16i64_v16bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v16i64_v16bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v32i64_v32bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v32i64_v32bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v2i32_v2bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v2i32_v2bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v3i32_v3bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v3i32_v3bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v4i32_v4bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v4i32_v4bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v8i32_v8bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v8i32_v8bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v16i32_v16bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v16i32_v16bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v32i32_v32bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v32i32_v32bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v2i16_v2bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v2i16_v2bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v3i16_v3bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v3i16_v3bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v4i16_v4bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v4i16_v4bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v8i16_v8bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v8i16_v8bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v16i16_v16bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v16i16_v16bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v32i16_v32bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v32i16_v32bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v2i8_v2bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v2i8_v2bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v3i8_v3bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v3i8_v3bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v4i8_v4bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v4i8_v4bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v8i8_v8bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v8i8_v8bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v16i8_v16bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v16i8_v16bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v32i8_v32bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v32i8_v32bf16 + define bfloat @stofp_i64_bf16(i64 %a) { ; CHECK-LABEL: stofp_i64_bf16: ; CHECK: // %bb.0: // %entry diff --git a/llvm/test/CodeGen/AArch64/mingw-refptr.ll b/llvm/test/CodeGen/AArch64/mingw-refptr.ll index 306bee9f85c42..cc9fac0506ff5 100644 --- a/llvm/test/CodeGen/AArch64/mingw-refptr.ll +++ b/llvm/test/CodeGen/AArch64/mingw-refptr.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -mtriple=aarch64-w64-mingw32 | FileCheck %s -; RUN: llc < %s -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* \ -; RUN: -mtriple=aarch64-w64-mingw32 2>&1| FileCheck %s --check-prefixes=GISEL,FALLBACK +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=aarch64-w64-mingw32 | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=aarch64-w64-mingw32 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI @var = external local_unnamed_addr global i32, align 4 @dsolocalvar = external dso_local local_unnamed_addr global i32, align 4 @@ -10,10 +10,11 @@ define dso_local i32 @getVar() { ; CHECK-LABEL: getVar: -; CHECK: adrp x8, .refptr.var -; CHECK: ldr x8, [x8, :lo12:.refptr.var] -; CHECK: ldr w0, [x8] -; CHECK: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, .refptr.var +; CHECK-NEXT: ldr x8, [x8, :lo12:.refptr.var] +; CHECK-NEXT: ldr w0, [x8] +; CHECK-NEXT: ret entry: %0 = load i32, ptr @var, align 4 ret i32 %0 @@ -21,9 +22,10 @@ entry: define dso_local i32 @getDsoLocalVar() { ; CHECK-LABEL: getDsoLocalVar: -; CHECK: adrp x8, dsolocalvar -; CHECK: ldr w0, [x8, :lo12:dsolocalvar] -; CHECK: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, dsolocalvar +; CHECK-NEXT: ldr w0, [x8, :lo12:dsolocalvar] +; CHECK-NEXT: ret entry: %0 = load i32, ptr @dsolocalvar, align 4 ret i32 %0 @@ -31,9 +33,10 @@ entry: define dso_local i32 @getLocalVar() { ; CHECK-LABEL: getLocalVar: -; CHECK: adrp x8, localvar -; CHECK: ldr w0, [x8, :lo12:localvar] -; CHECK: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, localvar +; CHECK-NEXT: ldr w0, [x8, :lo12:localvar] +; CHECK-NEXT: ret entry: %0 = load i32, ptr @localvar, align 4 ret i32 %0 @@ -41,9 +44,10 @@ entry: define dso_local i32 @getLocalCommon() { ; CHECK-LABEL: getLocalCommon: -; CHECK: adrp x8, localcommon -; CHECK: ldr w0, [x8, :lo12:localcommon] -; CHECK: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, localcommon +; CHECK-NEXT: ldr w0, [x8, :lo12:localcommon] +; CHECK-NEXT: ret entry: %0 = load i32, ptr @localcommon, align 4 ret i32 %0 @@ -51,10 +55,11 @@ entry: define dso_local i32 @getExtVar() { ; CHECK-LABEL: getExtVar: -; CHECK: adrp x8, __imp_extvar -; CHECK: ldr x8, [x8, :lo12:__imp_extvar] -; CHECK: ldr w0, [x8] -; CHECK: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, __imp_extvar +; CHECK-NEXT: ldr x8, [x8, :lo12:__imp_extvar] +; CHECK-NEXT: ldr w0, [x8] +; CHECK-NEXT: ret entry: %0 = load i32, ptr @extvar, align 4 ret i32 %0 @@ -62,7 +67,8 @@ entry: define dso_local void @callFunc() { ; CHECK-LABEL: callFunc: -; CHECK: b otherFunc +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: b otherFunc entry: tail call void @otherFunc() ret void @@ -70,16 +76,40 @@ entry: declare dso_local void @otherFunc() -; FALLBACK-NOT: remark:{{.*}}sspFunc define dso_local void @sspFunc() #0 { ; CHECK-LABEL: sspFunc: -; CHECK: adrp x8, .refptr.__stack_chk_guard -; CHECK: ldr x8, [x8, :lo12:.refptr.__stack_chk_guard] -; CHECK: ldr x8, [x8] -; GISEL-LABEL: sspFunc: -; GISEL: adrp x8, .refptr.__stack_chk_guard -; GISEL: ldr x8, [x8, :lo12:.refptr.__stack_chk_guard] -; GISEL: ldr x8, [x8] +; CHECK: .seh_proc sspFunc +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: .seh_stackalloc 32 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: .seh_save_reg x30, 16 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: adrp x8, .refptr.__stack_chk_guard +; CHECK-NEXT: add x0, sp, #7 +; CHECK-NEXT: ldr x8, [x8, :lo12:.refptr.__stack_chk_guard] +; CHECK-NEXT: ldr x8, [x8] +; CHECK-NEXT: str x8, [sp, #8] +; CHECK-NEXT: bl ptrUser +; CHECK-NEXT: adrp x8, .refptr.__stack_chk_guard +; CHECK-NEXT: ldr x8, [x8, :lo12:.refptr.__stack_chk_guard] +; CHECK-NEXT: ldr x9, [sp, #8] +; CHECK-NEXT: ldr x8, [x8] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB6_2 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg x30, 16 +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .seh_stackalloc 32 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB6_2: // %entry +; CHECK-NEXT: bl __stack_chk_fail +; CHECK-NEXT: brk #0x1 +; CHECK-NEXT: .seh_endfunclet +; CHECK-NEXT: .seh_endproc entry: %c = alloca i8, align 1 call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %c) @@ -102,3 +132,7 @@ attributes #0 = { sspstrong } ; CHECK: .globl .refptr.var ; CHECK: .refptr.var: ; CHECK: .xword var + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-GI: {{.*}} +; CHECK-SD: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/mulcmle.ll b/llvm/test/CodeGen/AArch64/mulcmle.ll index 32bc5c5e63b3e..5b9f438ed1d43 100644 --- a/llvm/test/CodeGen/AArch64/mulcmle.ll +++ b/llvm/test/CodeGen/AArch64/mulcmle.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc -mtriple=aarch64 %s -o - -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc -mtriple=aarch64 %s -o - -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI define <1 x i64> @v1i64(<1 x i64> %a) { ; CHECK-SD-LABEL: v1i64: diff --git a/llvm/test/CodeGen/AArch64/neon-perm.ll b/llvm/test/CodeGen/AArch64/neon-perm.ll index def0f15790a9b..7218204ba844c 100644 --- a/llvm/test/CodeGen/AArch64/neon-perm.ll +++ b/llvm/test/CodeGen/AArch64/neon-perm.ll @@ -1,13 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for test_vuzp1q_p0 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vuzp2q_p0 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vzip1q_p0 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vzip2q_p0 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vtrn1q_p0 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vtrn2q_p0 +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI %struct.int8x8x2_t = type { [2 x <8 x i8>] } %struct.int16x4x2_t = type { [2 x <4 x i16>] } diff --git a/llvm/test/CodeGen/AArch64/neon-vector-splat.ll b/llvm/test/CodeGen/AArch64/neon-vector-splat.ll index 489eaf179a1bd..d3846cab46f55 100644 --- a/llvm/test/CodeGen/AArch64/neon-vector-splat.ll +++ b/llvm/test/CodeGen/AArch64/neon-vector-splat.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel=1 -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for shuffle8 +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel=1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI define <2 x i32> @shuffle(ptr %P) { ; CHECK-SD-LABEL: shuffle: @@ -116,10 +114,16 @@ define <2 x i64> @shuffle7(ptr %P) { } define <2 x ptr> @shuffle8(ptr %P) { -; CHECK-LABEL: shuffle8: -; CHECK: // %bb.0: -; CHECK-NEXT: ld1r { v0.2d }, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shuffle8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ld1r { v0.2d }, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shuffle8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: dup v0.2d, v0.d[0] +; CHECK-GI-NEXT: ret %lv2ptr = load <2 x ptr>, ptr %P %sv2ptr = shufflevector <2 x ptr> %lv2ptr, <2 x ptr> undef, <2 x i32> zeroinitializer ret <2 x ptr> %sv2ptr diff --git a/llvm/test/CodeGen/AArch64/overflow.ll b/llvm/test/CodeGen/AArch64/overflow.ll index 977141f2b84f4..489d46f8b0e72 100644 --- a/llvm/test/CodeGen/AArch64/overflow.ll +++ b/llvm/test/CodeGen/AArch64/overflow.ll @@ -1,7 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=arm64-eabi -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,SDAG -; RUN: llc < %s -mtriple=arm64-eabi -global-isel -global-isel-abort=2 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,GISEL - +; RUN: llc < %s -mtriple=arm64-eabi -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=arm64-eabi -global-isel -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-GI define zeroext i1 @saddo1.i32.unused(i32 %v1, i32 %v2, ptr %res) { ; CHECK-LABEL: saddo1.i32.unused: @@ -105,19 +104,19 @@ entry: ret i1 %obit } define zeroext i1 @saddo.add.i32(i32 %v1, i32 %v2, i32 %v3, i32 %v4, i32 %v5, ptr %res) { -; SDAG-LABEL: saddo.add.i32: -; SDAG: // %bb.0: // %entry -; SDAG-NEXT: add w8, w4, #100 -; SDAG-NEXT: subs w8, w8, #100 -; SDAG-NEXT: cset w0, vs -; SDAG-NEXT: str w8, [x5] -; SDAG-NEXT: ret +; CHECK-SD-LABEL: saddo.add.i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: add w8, w4, #100 +; CHECK-SD-NEXT: subs w8, w8, #100 +; CHECK-SD-NEXT: cset w0, vs +; CHECK-SD-NEXT: str w8, [x5] +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: saddo.add.i32: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: mov w0, wzr -; GISEL-NEXT: str w4, [x5] -; GISEL-NEXT: ret +; CHECK-GI-LABEL: saddo.add.i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w0, wzr +; CHECK-GI-NEXT: str w4, [x5] +; CHECK-GI-NEXT: ret entry: %lhs = add nsw i32 %v5, 100 %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %lhs, i32 -100) @@ -128,20 +127,20 @@ entry: } define zeroext i1 @uaddo.add.i32(i32 %v1, i32 %v2, i32 %v3, i32 %v4, i32 %v5, ptr %res) { -; SDAG-LABEL: uaddo.add.i32: -; SDAG: // %bb.0: // %entry -; SDAG-NEXT: add w8, w4, #5 -; SDAG-NEXT: adds w8, w8, #5 -; SDAG-NEXT: cset w0, hs -; SDAG-NEXT: str w8, [x5] -; SDAG-NEXT: ret +; CHECK-SD-LABEL: uaddo.add.i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: add w8, w4, #5 +; CHECK-SD-NEXT: adds w8, w8, #5 +; CHECK-SD-NEXT: cset w0, hs +; CHECK-SD-NEXT: str w8, [x5] +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: uaddo.add.i32: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: adds w8, w4, #10 -; GISEL-NEXT: cset w0, hs -; GISEL-NEXT: str w8, [x5] -; GISEL-NEXT: ret +; CHECK-GI-LABEL: uaddo.add.i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: adds w8, w4, #10 +; CHECK-GI-NEXT: cset w0, hs +; CHECK-GI-NEXT: str w8, [x5] +; CHECK-GI-NEXT: ret entry: %lhs = add nuw i32 %v5, 5 %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %lhs, i32 5) diff --git a/llvm/test/CodeGen/AArch64/phi.ll b/llvm/test/CodeGen/AArch64/phi.ll index eeafbaffbcc69..55942d0e421bb 100644 --- a/llvm/test/CodeGen/AArch64/phi.ll +++ b/llvm/test/CodeGen/AArch64/phi.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=aarch64 -global-isel=0 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc -mtriple=aarch64 -global-isel=1 -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc -mtriple=aarch64 -global-isel=1 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI define i8 @ti8(i1 %c, ptr %p, i8 %a, i8 %b) { ; CHECK-SD-LABEL: ti8: diff --git a/llvm/test/CodeGen/AArch64/sadd_sat.ll b/llvm/test/CodeGen/AArch64/sadd_sat.ll index cb52c17e2531c..d07fcbc29806f 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i4 @llvm.sadd.sat.i4(i4, i4) declare i8 @llvm.sadd.sat.i8(i8, i8) diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_plus.ll b/llvm/test/CodeGen/AArch64/sadd_sat_plus.ll index f6fb4dd5e4b41..4a0e49518517b 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat_plus.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_plus.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i4 @llvm.sadd.sat.i4(i4, i4) declare i8 @llvm.sadd.sat.i8(i8, i8) diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll index 29318bd28c45d..531562d3aa678 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -2,6 +2,9 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; CHECK-GI: warning: Instruction selection used fallback path for v16i4 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 + declare <1 x i8> @llvm.sadd.sat.v1i8(<1 x i8>, <1 x i8>) declare <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8>, <2 x i8>) declare <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8>, <4 x i8>) @@ -494,21 +497,45 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { } define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { -; CHECK-LABEL: v2i128: -; CHECK: // %bb.0: -; CHECK-NEXT: adds x8, x0, x4 -; CHECK-NEXT: adcs x9, x1, x5 -; CHECK-NEXT: asr x10, x9, #63 -; CHECK-NEXT: eor x11, x10, #0x8000000000000000 -; CHECK-NEXT: csel x0, x10, x8, vs -; CHECK-NEXT: csel x1, x11, x9, vs -; CHECK-NEXT: adds x8, x2, x6 -; CHECK-NEXT: adcs x9, x3, x7 -; CHECK-NEXT: asr x10, x9, #63 -; CHECK-NEXT: eor x11, x10, #0x8000000000000000 -; CHECK-NEXT: csel x2, x10, x8, vs -; CHECK-NEXT: csel x3, x11, x9, vs -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i128: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adds x8, x0, x4 +; CHECK-SD-NEXT: adcs x9, x1, x5 +; CHECK-SD-NEXT: asr x10, x9, #63 +; CHECK-SD-NEXT: eor x11, x10, #0x8000000000000000 +; CHECK-SD-NEXT: csel x0, x10, x8, vs +; CHECK-SD-NEXT: csel x1, x11, x9, vs +; CHECK-SD-NEXT: adds x8, x2, x6 +; CHECK-SD-NEXT: adcs x9, x3, x7 +; CHECK-SD-NEXT: asr x10, x9, #63 +; CHECK-SD-NEXT: eor x11, x10, #0x8000000000000000 +; CHECK-SD-NEXT: csel x2, x10, x8, vs +; CHECK-SD-NEXT: csel x3, x11, x9, vs +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i128: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adds x9, x0, x4 +; CHECK-GI-NEXT: mov w8, wzr +; CHECK-GI-NEXT: mov x13, #-9223372036854775808 // =0x8000000000000000 +; CHECK-GI-NEXT: adcs x10, x1, x5 +; CHECK-GI-NEXT: asr x11, x10, #63 +; CHECK-GI-NEXT: cset w12, vs +; CHECK-GI-NEXT: cmp w8, #1 +; CHECK-GI-NEXT: adc x14, x11, x13 +; CHECK-GI-NEXT: tst w12, #0x1 +; CHECK-GI-NEXT: csel x0, x11, x9, ne +; CHECK-GI-NEXT: csel x1, x14, x10, ne +; CHECK-GI-NEXT: adds x9, x2, x6 +; CHECK-GI-NEXT: adcs x10, x3, x7 +; CHECK-GI-NEXT: asr x11, x10, #63 +; CHECK-GI-NEXT: cset w12, vs +; CHECK-GI-NEXT: cmp w8, #1 +; CHECK-GI-NEXT: adc x8, x11, x13 +; CHECK-GI-NEXT: tst w12, #0x1 +; CHECK-GI-NEXT: csel x2, x11, x9, ne +; CHECK-GI-NEXT: csel x3, x8, x10, ne +; CHECK-GI-NEXT: ret %z = call <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y) ret <2 x i128> %z } diff --git a/llvm/test/CodeGen/AArch64/sext.ll b/llvm/test/CodeGen/AArch64/sext.ll index 3604db33d5c4b..53fbb351954fc 100644 --- a/llvm/test/CodeGen/AArch64/sext.ll +++ b/llvm/test/CodeGen/AArch64/sext.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI define i16 @sext_i8_to_i16(i8 %a) { ; CHECK-LABEL: sext_i8_to_i16: diff --git a/llvm/test/CodeGen/AArch64/shufflevector.ll b/llvm/test/CodeGen/AArch64/shufflevector.ll index 69d3174581e3e..0f5b240e387ed 100644 --- a/llvm/test/CodeGen/AArch64/shufflevector.ll +++ b/llvm/test/CodeGen/AArch64/shufflevector.ll @@ -1,11 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for shufflevector_v2p0 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v2p0_zeroes -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v4p0 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v4p0_zeroes +; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; ===== Legal Vector Types ===== @@ -392,13 +387,49 @@ define <4 x i64> @shufflevector_v4i64(<4 x i64> %a, <4 x i64> %b) { ret <4 x i64> %c } +define <3 x ptr> @shufflevector_v3p0(<3 x ptr> %a, <3 x ptr> %b) { +; CHECK-SD-LABEL: shufflevector_v3p0: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmov d2, d5 +; CHECK-SD-NEXT: fmov d0, d1 +; CHECK-SD-NEXT: fmov d1, d3 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v3p0: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: fmov x9, d3 +; CHECK-GI-NEXT: mov v0.d[0], x8 +; CHECK-GI-NEXT: mov v2.d[0], x9 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: fmov x9, d4 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: mov v2.d[1], x9 +; CHECK-GI-NEXT: fmov x8, d5 +; CHECK-GI-NEXT: mov v1.d[0], x8 +; CHECK-GI-NEXT: ext v0.16b, v0.16b, v2.16b, #8 +; CHECK-GI-NEXT: fmov x10, d1 +; CHECK-GI-NEXT: mov d2, v0.d[1] +; CHECK-GI-NEXT: fmov d1, d2 +; CHECK-GI-NEXT: fmov d2, x10 +; CHECK-GI-NEXT: ret + %c = shufflevector <3 x ptr> %a, <3 x ptr> %b, <3 x i32> + ret <3 x ptr> %c +} + define <4 x ptr> @shufflevector_v4p0(<4 x ptr> %a, <4 x ptr> %b) { -; CHECK-LABEL: shufflevector_v4p0: -; CHECK: // %bb.0: -; CHECK-NEXT: zip2 v2.2d, v2.2d, v3.2d -; CHECK-NEXT: zip2 v0.2d, v0.2d, v1.2d -; CHECK-NEXT: mov v1.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shufflevector_v4p0: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: zip2 v2.2d, v2.2d, v3.2d +; CHECK-SD-NEXT: zip2 v0.2d, v0.2d, v1.2d +; CHECK-SD-NEXT: mov v1.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v4p0: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: zip2 v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: zip2 v1.2d, v2.2d, v3.2d +; CHECK-GI-NEXT: ret %c = shufflevector <4 x ptr> %a, <4 x ptr> %b, <4 x i32> ret <4 x ptr> %c } @@ -549,13 +580,13 @@ define <3 x i8> @shufflevector_v3i8(<3 x i8> %a, <3 x i8> %b) { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: fmov s0, w0 ; CHECK-GI-NEXT: fmov s1, w3 -; CHECK-GI-NEXT: adrp x8, .LCPI34_0 +; CHECK-GI-NEXT: adrp x8, .LCPI35_0 ; CHECK-GI-NEXT: mov v0.b[1], w1 ; CHECK-GI-NEXT: mov v1.b[1], w4 ; CHECK-GI-NEXT: mov v0.b[2], w2 ; CHECK-GI-NEXT: mov v1.b[2], w5 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI34_0] +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI35_0] ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b ; CHECK-GI-NEXT: umov w0, v0.b[0] ; CHECK-GI-NEXT: umov w1, v0.b[1] @@ -570,9 +601,9 @@ define <7 x i8> @shufflevector_v7i8(<7 x i8> %a, <7 x i8> %b) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-SD-NEXT: adrp x8, .LCPI35_0 +; CHECK-SD-NEXT: adrp x8, .LCPI36_0 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: ldr d1, [x8, :lo12:.LCPI35_0] +; CHECK-SD-NEXT: ldr d1, [x8, :lo12:.LCPI36_0] ; CHECK-SD-NEXT: tbl v0.8b, { v0.16b }, v1.8b ; CHECK-SD-NEXT: ret ; @@ -580,9 +611,9 @@ define <7 x i8> @shufflevector_v7i8(<7 x i8> %a, <7 x i8> %b) { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: adrp x8, .LCPI35_0 +; CHECK-GI-NEXT: adrp x8, .LCPI36_0 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI35_0] +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI36_0] ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret @@ -601,9 +632,9 @@ define <3 x i16> @shufflevector_v3i16(<3 x i16> %a, <3 x i16> %b) { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: adrp x8, .LCPI36_0 +; CHECK-GI-NEXT: adrp x8, .LCPI37_0 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI36_0] +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI37_0] ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret @@ -614,18 +645,18 @@ define <3 x i16> @shufflevector_v3i16(<3 x i16> %a, <3 x i16> %b) { define <7 x i16> @shufflevector_v7i16(<7 x i16> %a, <7 x i16> %b) { ; CHECK-SD-LABEL: shufflevector_v7i16: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: adrp x8, .LCPI37_0 +; CHECK-SD-NEXT: adrp x8, .LCPI38_0 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI37_0] +; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI38_0] ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v7i16: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI37_0 +; CHECK-GI-NEXT: adrp x8, .LCPI38_0 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_0] +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI38_0] ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret @@ -642,9 +673,9 @@ define <3 x i32> @shufflevector_v3i32(<3 x i32> %a, <3 x i32> %b) { ; ; CHECK-GI-LABEL: shufflevector_v3i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI38_0 +; CHECK-GI-NEXT: adrp x8, .LCPI39_0 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI38_0] +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI39_0] ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/ssub_sat.ll b/llvm/test/CodeGen/AArch64/ssub_sat.ll index cf201d628b7e1..23550d3c41cc7 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i4 @llvm.ssub.sat.i4(i4, i4) declare i8 @llvm.ssub.sat.i8(i8, i8) diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_plus.ll b/llvm/test/CodeGen/AArch64/ssub_sat_plus.ll index cabd580e20d50..f08629c15f26c 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat_plus.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_plus.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i4 @llvm.ssub.sat.i4(i4, i4) declare i8 @llvm.ssub.sat.i8(i8, i8) diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll index 30e2a70ace072..be4a5843e8215 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -2,6 +2,9 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; CHECK-GI: warning: Instruction selection used fallback path for v16i4 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 + declare <1 x i8> @llvm.ssub.sat.v1i8(<1 x i8>, <1 x i8>) declare <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8>, <2 x i8>) declare <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8>, <4 x i8>) @@ -497,21 +500,45 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { } define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { -; CHECK-LABEL: v2i128: -; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x0, x4 -; CHECK-NEXT: sbcs x9, x1, x5 -; CHECK-NEXT: asr x10, x9, #63 -; CHECK-NEXT: eor x11, x10, #0x8000000000000000 -; CHECK-NEXT: csel x0, x10, x8, vs -; CHECK-NEXT: csel x1, x11, x9, vs -; CHECK-NEXT: subs x8, x2, x6 -; CHECK-NEXT: sbcs x9, x3, x7 -; CHECK-NEXT: asr x10, x9, #63 -; CHECK-NEXT: eor x11, x10, #0x8000000000000000 -; CHECK-NEXT: csel x2, x10, x8, vs -; CHECK-NEXT: csel x3, x11, x9, vs -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i128: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: subs x8, x0, x4 +; CHECK-SD-NEXT: sbcs x9, x1, x5 +; CHECK-SD-NEXT: asr x10, x9, #63 +; CHECK-SD-NEXT: eor x11, x10, #0x8000000000000000 +; CHECK-SD-NEXT: csel x0, x10, x8, vs +; CHECK-SD-NEXT: csel x1, x11, x9, vs +; CHECK-SD-NEXT: subs x8, x2, x6 +; CHECK-SD-NEXT: sbcs x9, x3, x7 +; CHECK-SD-NEXT: asr x10, x9, #63 +; CHECK-SD-NEXT: eor x11, x10, #0x8000000000000000 +; CHECK-SD-NEXT: csel x2, x10, x8, vs +; CHECK-SD-NEXT: csel x3, x11, x9, vs +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i128: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: subs x9, x0, x4 +; CHECK-GI-NEXT: mov w8, wzr +; CHECK-GI-NEXT: mov x13, #-9223372036854775808 // =0x8000000000000000 +; CHECK-GI-NEXT: sbcs x10, x1, x5 +; CHECK-GI-NEXT: asr x11, x10, #63 +; CHECK-GI-NEXT: cset w12, vs +; CHECK-GI-NEXT: cmp w8, #1 +; CHECK-GI-NEXT: adc x14, x11, x13 +; CHECK-GI-NEXT: tst w12, #0x1 +; CHECK-GI-NEXT: csel x0, x11, x9, ne +; CHECK-GI-NEXT: csel x1, x14, x10, ne +; CHECK-GI-NEXT: subs x9, x2, x6 +; CHECK-GI-NEXT: sbcs x10, x3, x7 +; CHECK-GI-NEXT: asr x11, x10, #63 +; CHECK-GI-NEXT: cset w12, vs +; CHECK-GI-NEXT: cmp w8, #1 +; CHECK-GI-NEXT: adc x8, x11, x13 +; CHECK-GI-NEXT: tst w12, #0x1 +; CHECK-GI-NEXT: csel x2, x11, x9, ne +; CHECK-GI-NEXT: csel x3, x8, x10, ne +; CHECK-GI-NEXT: ret %z = call <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128> %x, <2 x i128> %y) ret <2 x i128> %z } diff --git a/llvm/test/CodeGen/AArch64/uadd_sat.ll b/llvm/test/CodeGen/AArch64/uadd_sat.ll index ccf46e8fce2e1..e9d22c7be52ef 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i4 @llvm.uadd.sat.i4(i4, i4) declare i8 @llvm.uadd.sat.i8(i8, i8) diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_plus.ll b/llvm/test/CodeGen/AArch64/uadd_sat_plus.ll index d29564029544c..5c81e3f20277a 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat_plus.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_plus.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i4 @llvm.uadd.sat.i4(i4, i4) declare i8 @llvm.uadd.sat.i8(i8, i8) diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll index badd31c1c561c..924bd3981779e 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -2,6 +2,9 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; CHECK-GI: warning: Instruction selection used fallback path for v16i4 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 + declare <1 x i8> @llvm.uadd.sat.v1i8(<1 x i8>, <1 x i8>) declare <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8>, <2 x i8>) declare <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8>, <4 x i8>) @@ -488,17 +491,33 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { } define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { -; CHECK-LABEL: v2i128: -; CHECK: // %bb.0: -; CHECK-NEXT: adds x8, x0, x4 -; CHECK-NEXT: adcs x9, x1, x5 -; CHECK-NEXT: csinv x0, x8, xzr, lo -; CHECK-NEXT: csinv x1, x9, xzr, lo -; CHECK-NEXT: adds x8, x2, x6 -; CHECK-NEXT: adcs x9, x3, x7 -; CHECK-NEXT: csinv x2, x8, xzr, lo -; CHECK-NEXT: csinv x3, x9, xzr, lo -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i128: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adds x8, x0, x4 +; CHECK-SD-NEXT: adcs x9, x1, x5 +; CHECK-SD-NEXT: csinv x0, x8, xzr, lo +; CHECK-SD-NEXT: csinv x1, x9, xzr, lo +; CHECK-SD-NEXT: adds x8, x2, x6 +; CHECK-SD-NEXT: adcs x9, x3, x7 +; CHECK-SD-NEXT: csinv x2, x8, xzr, lo +; CHECK-SD-NEXT: csinv x3, x9, xzr, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i128: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adds x8, x0, x4 +; CHECK-GI-NEXT: adcs x9, x1, x5 +; CHECK-GI-NEXT: cset w10, hs +; CHECK-GI-NEXT: tst w10, #0x1 +; CHECK-GI-NEXT: csinv x0, x8, xzr, eq +; CHECK-GI-NEXT: csinv x1, x9, xzr, eq +; CHECK-GI-NEXT: adds x8, x2, x6 +; CHECK-GI-NEXT: adcs x9, x3, x7 +; CHECK-GI-NEXT: cset w10, hs +; CHECK-GI-NEXT: tst w10, #0x1 +; CHECK-GI-NEXT: csinv x2, x8, xzr, eq +; CHECK-GI-NEXT: csinv x3, x9, xzr, eq +; CHECK-GI-NEXT: ret %z = call <2 x i128> @llvm.uadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y) ret <2 x i128> %z } diff --git a/llvm/test/CodeGen/AArch64/usub_sat.ll b/llvm/test/CodeGen/AArch64/usub_sat.ll index 160e7e6607cdc..54d7fc5a63b11 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i4 @llvm.usub.sat.i4(i4, i4) declare i8 @llvm.usub.sat.i8(i8, i8) diff --git a/llvm/test/CodeGen/AArch64/usub_sat_plus.ll b/llvm/test/CodeGen/AArch64/usub_sat_plus.ll index a9932216dbe34..2793aeb163c94 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat_plus.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_plus.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i4 @llvm.usub.sat.i4(i4, i4) declare i8 @llvm.usub.sat.i8(i8, i8) diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll index 45418b5c648fa..a623eb554cac7 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -2,6 +2,9 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; CHECK-GI: warning: Instruction selection used fallback path for v16i4 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 + declare <1 x i8> @llvm.usub.sat.v1i8(<1 x i8>, <1 x i8>) declare <2 x i8> @llvm.usub.sat.v2i8(<2 x i8>, <2 x i8>) declare <4 x i8> @llvm.usub.sat.v4i8(<4 x i8>, <4 x i8>) @@ -486,17 +489,33 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { } define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { -; CHECK-LABEL: v2i128: -; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x0, x4 -; CHECK-NEXT: sbcs x9, x1, x5 -; CHECK-NEXT: csel x0, xzr, x8, lo -; CHECK-NEXT: csel x1, xzr, x9, lo -; CHECK-NEXT: subs x8, x2, x6 -; CHECK-NEXT: sbcs x9, x3, x7 -; CHECK-NEXT: csel x2, xzr, x8, lo -; CHECK-NEXT: csel x3, xzr, x9, lo -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i128: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: subs x8, x0, x4 +; CHECK-SD-NEXT: sbcs x9, x1, x5 +; CHECK-SD-NEXT: csel x0, xzr, x8, lo +; CHECK-SD-NEXT: csel x1, xzr, x9, lo +; CHECK-SD-NEXT: subs x8, x2, x6 +; CHECK-SD-NEXT: sbcs x9, x3, x7 +; CHECK-SD-NEXT: csel x2, xzr, x8, lo +; CHECK-SD-NEXT: csel x3, xzr, x9, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i128: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: subs x8, x0, x4 +; CHECK-GI-NEXT: sbcs x9, x1, x5 +; CHECK-GI-NEXT: cset w10, lo +; CHECK-GI-NEXT: tst w10, #0x1 +; CHECK-GI-NEXT: csel x0, xzr, x8, ne +; CHECK-GI-NEXT: csel x1, xzr, x9, ne +; CHECK-GI-NEXT: subs x8, x2, x6 +; CHECK-GI-NEXT: sbcs x9, x3, x7 +; CHECK-GI-NEXT: cset w10, lo +; CHECK-GI-NEXT: tst w10, #0x1 +; CHECK-GI-NEXT: csel x2, xzr, x8, ne +; CHECK-GI-NEXT: csel x3, xzr, x9, ne +; CHECK-GI-NEXT: ret %z = call <2 x i128> @llvm.usub.sat.v2i128(<2 x i128> %x, <2 x i128> %y) ret <2 x i128> %z } diff --git a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll index d71aed2d17506..809a6d6556a7b 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i1 @llvm.vector.reduce.umax.v1i1(<1 x i1> %a) declare i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> %a) diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll new file mode 100644 index 0000000000000..cce0fb7e003c5 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll @@ -0,0 +1,2719 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s +;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s +;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX1030 %s +;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX1100 %s + +define float @fmul_select_f32_test1(float %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f32_test1: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f32_test1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f32_test1: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f32_test1: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, float 2.000000e+00, float 1.000000e+00 + %ldexp = fmul float %x, %y + ret float %ldexp +} + +define float @fmul_select_f32_test2(float %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f32_test2: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f32_test2: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f32_test2: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f32_test2: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, float 5.000000e-01, float 1.000000e+00 + %ldexp = fmul float %x, %y + ret float %ldexp +} + +define <2 x float> @fmul_select_v2f32_test3(<2 x float> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) { +; GFX7-LABEL: fmul_select_v2f32_test3: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_v2f32_test3: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX9-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_v2f32_test3: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX1030-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc_lo +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5 +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1030-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_v2f32_test3: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX1100-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc_lo +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5 +; GFX1100-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 + %y = select <2 x i1> %bool, <2 x float> , <2 x float> + %ldexp = fmul <2 x float> %x, %y + ret <2 x float> %ldexp +} + +define <2 x float> @fmul_select_v2f32_test4(<2 x float> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) { +; GFX7-LABEL: fmul_select_v2f32_test4: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_v2f32_test4: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX9-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_v2f32_test4: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX1030-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc_lo +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5 +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1030-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_v2f32_test4: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX1100-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc_lo +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5 +; GFX1100-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 + %y = select <2 x i1> %bool, <2 x float> , <2 x float> + %ldexp = fmul <2 x float> %x, %y + ret <2 x float> %ldexp +} + +define float @fmul_select_f32_test5(float %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f32_test5: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f32_test5: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f32_test5: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f32_test5: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, float -2.000000e+00, float -1.000000e+00 + %ldexp = fmul float %x, %y + ret float %ldexp +} + +define float @fmul_select_f32_test6(float %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f32_test6: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000 +; GFX7-NEXT: v_mov_b32_e32 v4, 0xc0400000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f32_test6: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x41000000 +; GFX9-NEXT: v_mov_b32_e32 v4, 0xc0400000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f32_test6: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0xc0400000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x41000000, v3, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f32_test6: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, 0xc0400000 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x41000000, v3, vcc_lo +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, float -3.000000e+00, float 8.000000e+00 + %ldexp = fmul float %x, %y + ret float %ldexp +} + +define float @fmul_select_f32_test7_sel_log2val_pos59_pos92(float %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f32_test7_sel_log2val_pos59_pos92: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v3, 0x6d800000 +; GFX7-NEXT: v_mov_b32_e32 v4, 0x5d000000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f32_test7_sel_log2val_pos59_pos92: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x6d800000 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x5d000000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f32_test7_sel_log2val_pos59_pos92: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x5d000000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x6d800000, v3, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f32_test7_sel_log2val_pos59_pos92: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, 0x5d000000 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x6d800000, v3, vcc_lo +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, float 0x43A0000000000000, float 0x45B0000000000000 + %ldexp = fmul float %x, %y + ret float %ldexp +} + +define float @fmul_select_f32_test8(float %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f32_test8: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v3, 0xc1000000 +; GFX7-NEXT: v_mov_b32_e32 v4, 0x41800000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f32_test8: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0xc1000000 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x41800000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f32_test8: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x41800000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xc1000000, v3, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f32_test8: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, 0x41800000 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xc1000000, v3, vcc_lo +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, float 1.600000e+01, float -8.000000e+00 + %ldexp = fmul float %x, %y + ret float %ldexp +} + +define float @fmul_select_f32_test9(float %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f32_test9: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e64 v1, 2.0, 0, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f32_test9: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v1, 2.0, 0, vcc +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f32_test9: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e64 v1, 2.0, 0, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f32_test9: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: v_cndmask_b32_e64 v1, 2.0, 0, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, float 0.000000e+00, float 2.000000e+00 + %ldexp = fmul float %x, %y + ret float %ldexp +} + +define float @fmul_select_f32_test10(float %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f32_test10: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_bfrev_b32_e32 v3, 1 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f32_test10: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_bfrev_b32_e32 v3, 1 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f32_test10: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f32_test10: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, float -0.000000e+00, float 0.000000e+00 + %ldexp = fmul float %x, %y + ret float %ldexp +} + +define float @fmul_select_f32_test11_sel_log2val_pos78_pos56(float %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f32_test11_sel_log2val_pos78_pos56: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v3, 0xdb800000 +; GFX7-NEXT: v_mov_b32_e32 v4, 0xe6800000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f32_test11_sel_log2val_pos78_pos56: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0xdb800000 +; GFX9-NEXT: v_mov_b32_e32 v4, 0xe6800000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f32_test11_sel_log2val_pos78_pos56: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0xe6800000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xdb800000, v3, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f32_test11_sel_log2val_pos78_pos56: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, 0xe6800000 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xdb800000, v3, vcc_lo +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, float 0xC4D0000000000000, float 0xC370000000000000 + %ldexp = fmul float %x, %y + ret float %ldexp +} + +define float @fmul_select_f32_test12_sel_log2val_neg48_pos68(float %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v3, 0x61800000 +; GFX7-NEXT: v_mov_b32_e32 v4, 0x27800000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x61800000 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x27800000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x27800000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x61800000, v3, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, 0x27800000 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x61800000, v3, vcc_lo +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, float 0x3CF0000000000000, float 0x4430000000000000 + %ldexp = fmul float %x, %y + ret float %ldexp +} + +define double @fmul_select_f64_test1(double %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f64_test1: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v4, 0x3ff00000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc +; GFX7-NEXT: v_mov_b32_e32 v2, 0 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f64_test1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v4, 0x3ff00000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f64_test1: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1030-NEXT: v_mov_b32_e32 v4, 0 +; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0x3ff00000, 2.0, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f64_test1: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1100-NEXT: v_mov_b32_e32 v4, 0 +; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0x3ff00000, 2.0, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, double 2.000000e+00, double 1.000000e+00 + %ldexp = fmul double %x, %y + ret double %ldexp +} + +define double @fmul_select_f64_test2(double %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f64_test2: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v4, 0x3ff00000 +; GFX7-NEXT: v_mov_b32_e32 v5, 0x3fe00000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc +; GFX7-NEXT: v_mov_b32_e32 v2, 0 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f64_test2: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v4, 0x3ff00000 +; GFX9-NEXT: v_mov_b32_e32 v5, 0x3fe00000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f64_test2: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v5, 0x3fe00000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1030-NEXT: v_mov_b32_e32 v4, 0 +; GFX1030-NEXT: v_cndmask_b32_e32 v5, 0x3ff00000, v5, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f64_test2: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v5, 0x3fe00000 :: v_dual_mov_b32 v4, 0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v5, 0x3ff00000, v5, vcc_lo +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, double 5.000000e-01, double 1.000000e+00 + %ldexp = fmul double %x, %y + ret double %ldexp +} + +define <2 x double> @fmul_select_v2f64_test3(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) { +; GFX7-LABEL: fmul_select_v2f64_test3: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v11, 0x3ff00000 +; GFX7-NEXT: v_mov_b32_e32 v8, 0 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 +; GFX7-NEXT: v_cndmask_b32_e64 v10, v11, 2.0, vcc +; GFX7-NEXT: v_mov_b32_e32 v9, v8 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10] +; GFX7-NEXT: v_cndmask_b32_e64 v9, v11, 2.0, vcc +; GFX7-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_v2f64_test3: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v11, 0x3ff00000 +; GFX9-NEXT: v_mov_b32_e32 v8, 0 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v10, v11, 2.0, vcc +; GFX9-NEXT: v_mov_b32_e32 v9, v8 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10] +; GFX9-NEXT: v_cndmask_b32_e64 v9, v11, 2.0, vcc +; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_v2f64_test3: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX1030-NEXT: v_mov_b32_e32 v8, 0 +; GFX1030-NEXT: v_cndmask_b32_e64 v11, 0x3ff00000, 2.0, vcc_lo +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 +; GFX1030-NEXT: v_mov_b32_e32 v10, v8 +; GFX1030-NEXT: v_cndmask_b32_e64 v9, 0x3ff00000, 2.0, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11] +; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_v2f64_test3: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX1100-NEXT: v_mov_b32_e32 v8, 0 +; GFX1100-NEXT: v_cndmask_b32_e64 v11, 0x3ff00000, 2.0, vcc_lo +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-NEXT: v_mov_b32_e32 v10, v8 +; GFX1100-NEXT: v_cndmask_b32_e64 v9, 0x3ff00000, 2.0, vcc_lo +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11] +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 + %y = select <2 x i1> %bool, <2 x double> , <2 x double> + %ldexp = fmul <2 x double> %x, %y + ret <2 x double> %ldexp +} + +define <2 x double> @fmul_select_v2f64_test4(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) { +; GFX7-LABEL: fmul_select_v2f64_test4: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v11, 0x3ff00000 +; GFX7-NEXT: v_mov_b32_e32 v12, 0x3fe00000 +; GFX7-NEXT: v_mov_b32_e32 v8, 0 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 +; GFX7-NEXT: v_cndmask_b32_e32 v10, v11, v12, vcc +; GFX7-NEXT: v_mov_b32_e32 v9, v8 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10] +; GFX7-NEXT: v_cndmask_b32_e32 v9, v11, v12, vcc +; GFX7-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_v2f64_test4: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v11, 0x3ff00000 +; GFX9-NEXT: v_mov_b32_e32 v12, 0x3fe00000 +; GFX9-NEXT: v_mov_b32_e32 v8, 0 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v10, v11, v12, vcc +; GFX9-NEXT: v_mov_b32_e32 v9, v8 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10] +; GFX9-NEXT: v_cndmask_b32_e32 v9, v11, v12, vcc +; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_v2f64_test4: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v9, 0x3fe00000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX1030-NEXT: v_mov_b32_e32 v8, 0 +; GFX1030-NEXT: v_cndmask_b32_e32 v11, 0x3ff00000, v9, vcc_lo +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 +; GFX1030-NEXT: v_mov_b32_e32 v10, v8 +; GFX1030-NEXT: v_cndmask_b32_e32 v9, 0x3ff00000, v9, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11] +; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_v2f64_test4: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v9, 0x3fe00000 :: v_dual_mov_b32 v8, 0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-NEXT: v_dual_mov_b32 v10, v8 :: v_dual_cndmask_b32 v11, 0x3ff00000, v9 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11] +; GFX1100-NEXT: v_cndmask_b32_e32 v9, 0x3ff00000, v9, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 + %y = select <2 x i1> %bool, <2 x double> , <2 x double> + %ldexp = fmul <2 x double> %x, %y + ret <2 x double> %ldexp +} + +define double @fmul_select_f64_test5(double %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f64_test5: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v4, 0xbff00000 +; GFX7-NEXT: v_mov_b32_e32 v5, 0xbfe00000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc +; GFX7-NEXT: v_mov_b32_e32 v2, 0 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f64_test5: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v4, 0xbff00000 +; GFX9-NEXT: v_mov_b32_e32 v5, 0xbfe00000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f64_test5: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v5, 0xbfe00000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1030-NEXT: v_mov_b32_e32 v4, 0 +; GFX1030-NEXT: v_cndmask_b32_e32 v5, 0xbff00000, v5, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f64_test5: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v5, 0xbfe00000 :: v_dual_mov_b32 v4, 0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v5, 0xbff00000, v5, vcc_lo +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, double -5.000000e-01, double -1.000000e+00 + %ldexp = fmul double %x, %y + ret double %ldexp +} + +define double @fmul_select_f64_test6(double %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f64_test6: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v4, 0xbff00000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc +; GFX7-NEXT: v_mov_b32_e32 v2, 0 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f64_test6: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v4, 0xbff00000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f64_test6: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1030-NEXT: v_mov_b32_e32 v4, 0 +; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, -2.0, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f64_test6: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1100-NEXT: v_mov_b32_e32 v4, 0 +; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, -2.0, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, double -2.000000e+00, double -1.000000e+00 + %ldexp = fmul double %x, %y + ret double %ldexp +} + +define double @fmul_select_f64_test7(double %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f64_test7: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v4, 0xbff00000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc +; GFX7-NEXT: v_mov_b32_e32 v2, 0 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f64_test7: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v4, 0xbff00000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f64_test7: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1030-NEXT: v_mov_b32_e32 v4, 0 +; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, 2.0, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f64_test7: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1100-NEXT: v_mov_b32_e32 v4, 0 +; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, 2.0, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, double 2.000000e+00, double -1.000000e+00 + %ldexp = fmul double %x, %y + ret double %ldexp +} + +define double @fmul_select_f64_test8(double %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f64_test8: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v4, 0xc0400000 +; GFX7-NEXT: v_mov_b32_e32 v5, 0xc0100000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc +; GFX7-NEXT: v_mov_b32_e32 v2, 0 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f64_test8: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v4, 0xc0400000 +; GFX9-NEXT: v_mov_b32_e32 v5, 0xc0100000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f64_test8: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v5, 0xc0100000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1030-NEXT: v_mov_b32_e32 v4, 0 +; GFX1030-NEXT: v_cndmask_b32_e32 v5, 0xc0400000, v5, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f64_test8: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v5, 0xc0100000 :: v_dual_mov_b32 v4, 0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v5, 0xc0400000, v5, vcc_lo +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, double -4.000000e+00, double -3.200000e+01 + %ldexp = fmul double %x, %y + ret double %ldexp +} + +define <2 x double> @fmul_select_v2f64_test9(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) { +; GFX7-LABEL: fmul_select_v2f64_test9: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v11, 0xbff00000 +; GFX7-NEXT: v_mov_b32_e32 v8, 0 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 +; GFX7-NEXT: v_cndmask_b32_e64 v10, v11, -2.0, vcc +; GFX7-NEXT: v_mov_b32_e32 v9, v8 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10] +; GFX7-NEXT: v_cndmask_b32_e64 v9, v11, -2.0, vcc +; GFX7-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_v2f64_test9: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v11, 0xbff00000 +; GFX9-NEXT: v_mov_b32_e32 v8, 0 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v10, v11, -2.0, vcc +; GFX9-NEXT: v_mov_b32_e32 v9, v8 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10] +; GFX9-NEXT: v_cndmask_b32_e64 v9, v11, -2.0, vcc +; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_v2f64_test9: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX1030-NEXT: v_mov_b32_e32 v8, 0 +; GFX1030-NEXT: v_cndmask_b32_e64 v11, 0xbff00000, -2.0, vcc_lo +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 +; GFX1030-NEXT: v_mov_b32_e32 v10, v8 +; GFX1030-NEXT: v_cndmask_b32_e64 v9, 0xbff00000, -2.0, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11] +; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_v2f64_test9: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX1100-NEXT: v_mov_b32_e32 v8, 0 +; GFX1100-NEXT: v_cndmask_b32_e64 v11, 0xbff00000, -2.0, vcc_lo +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-NEXT: v_mov_b32_e32 v10, v8 +; GFX1100-NEXT: v_cndmask_b32_e64 v9, 0xbff00000, -2.0, vcc_lo +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11] +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 + %y = select <2 x i1> %bool, <2 x double> , <2 x double> + %ldexp = fmul <2 x double> %x, %y + ret <2 x double> %ldexp +} + +define <2 x double> @fmul_select_v2f64_test10(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) { +; GFX7-LABEL: fmul_select_v2f64_test10: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v8, 0 +; GFX7-NEXT: v_mov_b32_e32 v9, 0xbff00000 +; GFX7-NEXT: v_mov_b32_e32 v10, 0x3fe00000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 +; GFX7-NEXT: v_mov_b32_e32 v11, 0x3ff00000 +; GFX7-NEXT: v_cndmask_b32_e32 v10, v9, v10, vcc +; GFX7-NEXT: v_mov_b32_e32 v9, v8 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10] +; GFX7-NEXT: v_cndmask_b32_e64 v9, v11, 2.0, vcc +; GFX7-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_v2f64_test10: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v8, 0 +; GFX9-NEXT: v_mov_b32_e32 v9, 0xbff00000 +; GFX9-NEXT: v_mov_b32_e32 v10, 0x3fe00000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 +; GFX9-NEXT: v_mov_b32_e32 v11, 0x3ff00000 +; GFX9-NEXT: v_cndmask_b32_e32 v10, v9, v10, vcc +; GFX9-NEXT: v_mov_b32_e32 v9, v8 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10] +; GFX9-NEXT: v_cndmask_b32_e64 v9, v11, 2.0, vcc +; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_v2f64_test10: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v9, 0x3fe00000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX1030-NEXT: v_mov_b32_e32 v8, 0 +; GFX1030-NEXT: v_cndmask_b32_e32 v11, 0xbff00000, v9, vcc_lo +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 +; GFX1030-NEXT: v_mov_b32_e32 v10, v8 +; GFX1030-NEXT: v_cndmask_b32_e64 v9, 0x3ff00000, 2.0, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11] +; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_v2f64_test10: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v9, 0x3fe00000 :: v_dual_mov_b32 v8, 0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-NEXT: v_dual_mov_b32 v10, v8 :: v_dual_cndmask_b32 v11, 0xbff00000, v9 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11] +; GFX1100-NEXT: v_cndmask_b32_e64 v9, 0x3ff00000, 2.0, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 + %y = select <2 x i1> %bool, <2 x double> , <2 x double> + %ldexp = fmul <2 x double> %x, %y + ret <2 x double> %ldexp +} + +define double @fmul_select_f64_test11(double %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f64_test11: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_bfrev_b32_e32 v4, 1 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc +; GFX7-NEXT: v_mov_b32_e32 v2, 0 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f64_test11: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_bfrev_b32_e32 v4, 1 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f64_test11: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1030-NEXT: v_mov_b32_e32 v4, 0 +; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0x80000000, -2.0, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f64_test11: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1100-NEXT: v_mov_b32_e32 v4, 0 +; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0x80000000, -2.0, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, double -2.000000e+00, double -0.000000e+00 + %ldexp = fmul double %x, %y + ret double %ldexp +} + +define double @fmul_select_f64_test12(double %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f64_test12: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, v2, v3 +; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 31, v2 +; GFX7-NEXT: v_mov_b32_e32 v2, 0 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f64_test12: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, v2, v3 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 31, v2 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f64_test12: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v3 +; GFX1030-NEXT: v_mov_b32_e32 v2, 0 +; GFX1030-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo +; GFX1030-NEXT: v_lshlrev_b32_e32 v3, 31, v3 +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f64_test12: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v3 +; GFX1100-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 31, v3 +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, double 0.000000e+00, double -0.000000e+00 + %ldexp = fmul double %x, %y + ret double %ldexp +} + +define double @fmul_select_f64_test13(double %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f64_test13: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v5, 0x40300000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-NEXT: v_mov_b32_e32 v4, 0 +; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f64_test13: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v5, 0x40300000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f64_test13: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1030-NEXT: v_mov_b32_e32 v4, 0 +; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0x40300000, 0, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f64_test13: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1100-NEXT: v_mov_b32_e32 v4, 0 +; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0x40300000, 0, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, double 0.000000e+00, double 1.600000e+01 + %ldexp = fmul double %x, %y + ret double %ldexp +} + +define double @fmul_select_f64_test14_sel_log2val_pos92_neg27(double %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v4, 0x3e400000 +; GFX7-NEXT: v_mov_b32_e32 v5, 0x45b00000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc +; GFX7-NEXT: v_mov_b32_e32 v2, 0 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v4, 0x3e400000 +; GFX9-NEXT: v_mov_b32_e32 v5, 0x45b00000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v5, 0x45b00000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1030-NEXT: v_mov_b32_e32 v4, 0 +; GFX1030-NEXT: v_cndmask_b32_e32 v5, 0x3e400000, v5, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v5, 0x45b00000 :: v_dual_mov_b32 v4, 0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v5, 0x3e400000, v5, vcc_lo +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, double 0x45B0000000000000, double 0x3E40000000000000 + %ldexp = fmul double %x, %y + ret double %ldexp +} + +define double @fmul_select_f64_test15_sel_log2val_neg42_neg33(double %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v4, 0x3de00000 +; GFX7-NEXT: v_mov_b32_e32 v5, 0x3d500000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc +; GFX7-NEXT: v_mov_b32_e32 v2, 0 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v4, 0x3de00000 +; GFX9-NEXT: v_mov_b32_e32 v5, 0x3d500000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v5, 0x3d500000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1030-NEXT: v_mov_b32_e32 v4, 0 +; GFX1030-NEXT: v_cndmask_b32_e32 v5, 0x3de00000, v5, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v5, 0x3d500000 :: v_dual_mov_b32 v4, 0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v5, 0x3de00000, v5, vcc_lo +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, double 0x3D50000000000000, double 0x3DE0000000000000 + %ldexp = fmul double %x, %y + ret double %ldexp +} + + +define half @fmul_select_f16_test1(half %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f16_test1: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f16_test1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x3c00 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x4000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f16_test1: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo +; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f16_test1: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, 0x4000 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo +; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, half 2.000000e+00, half 1.000000e+00 + %ldexp = fmul half %x, %y + ret half %ldexp +} + +define half @fmul_select_f16_test2(half %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f16_test2: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f16_test2: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x3c00 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x3800 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f16_test2: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x3800 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo +; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f16_test2: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, 0x3800 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo +; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, half 5.000000e-01, half 1.000000e+00 + %ldexp = fmul half %x, %y + ret half %ldexp +} + +define <2 x half> @fmul_select_v2f16_test3(<2 x half> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) { +; GFX7-LABEL: fmul_select_v2f16_test3: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc +; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_v2f16_test3: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v5, 0x3c00 +; GFX9-NEXT: v_mov_b32_e32 v6, 0x4000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc +; GFX9-NEXT: v_pack_b32_f16 v1, v1, v2 +; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_v2f16_test3: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v5, 0x4000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo +; GFX1030-NEXT: v_pack_b32_f16 v1, v1, v2 +; GFX1030-NEXT: v_pk_mul_f16 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_v2f16_test3: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v5, 0x4000 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo +; GFX1100-NEXT: v_pack_b32_f16 v1, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_pk_mul_f16 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 + %y = select <2 x i1> %bool, <2 x half> , <2 x half> + %ldexp = fmul <2 x half> %x, %y + ret <2 x half> %ldexp +} + +define <2 x half> @fmul_select_v2f16_test4(<2 x half> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) { +; GFX7-LABEL: fmul_select_v2f16_test4: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc +; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_v2f16_test4: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v5, 0x3c00 +; GFX9-NEXT: v_mov_b32_e32 v6, 0x3800 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc +; GFX9-NEXT: v_pack_b32_f16 v1, v1, v2 +; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_v2f16_test4: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v5, 0x3800 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo +; GFX1030-NEXT: v_pack_b32_f16 v1, v1, v2 +; GFX1030-NEXT: v_pk_mul_f16 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_v2f16_test4: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v5, 0x3800 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo +; GFX1100-NEXT: v_pack_b32_f16 v1, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_pk_mul_f16 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 + %y = select <2 x i1> %bool, <2 x half> , <2 x half> + %ldexp = fmul <2 x half> %x, %y + ret <2 x half> %ldexp +} + +define half @fmul_select_f16_test5(half %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f16_test5: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e64 v1, v3, 2.0, vcc +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f16_test5: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x4800 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x4000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f16_test5: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4800, v3, vcc_lo +; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f16_test5: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, 0x4000 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4800, v3, vcc_lo +; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, half 2.000000e+00, half 8.000000e+00 + %ldexp = fmul half %x, %y + ret half %ldexp +} + +define half @fmul_select_f16_test6(half %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f16_test6: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_mov_b32_e32 v3, 0x40400000 +; GFX7-NEXT: v_mov_b32_e32 v4, 0xc1000000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f16_test6: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x4200 +; GFX9-NEXT: v_mov_b32_e32 v4, 0xc800 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f16_test6: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0xc800 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4200, v3, vcc_lo +; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f16_test6: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, 0xc800 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4200, v3, vcc_lo +; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, half -8.000000e+00, half 3.000000e+00 + %ldexp = fmul half %x, %y + ret half %ldexp +} + +define half @fmul_select_f16_test7(half %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f16_test7: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, -4.0, v3, vcc +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f16_test7: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0xc400 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x4800 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f16_test7: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4800 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xc400, v3, vcc_lo +; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f16_test7: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, 0x4800 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xc400, v3, vcc_lo +; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, half 8.000000e+00, half -4.000000e+00 + %ldexp = fmul half %x, %y + ret half %ldexp +} + +define half @fmul_select_f16_test8(half %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f16_test8: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_bfrev_b32_e32 v3, 1 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f16_test8: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x8000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc +; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f16_test8: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, 0x8000, vcc_lo +; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f16_test8: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 0x8000, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, half -0.000000e+00, half 0.000000e+00 + %ldexp = fmul half %x, %y + ret half %ldexp +} + +define half @fmul_select_f16_test9(half %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f16_test9: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_mov_b32_e32 v3, 0xc2000000 +; GFX7-NEXT: v_mov_b32_e32 v4, 0xc1800000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f16_test9: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0xd000 +; GFX9-NEXT: v_mov_b32_e32 v4, 0xcc00 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f16_test9: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0xcc00 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xd000, v3, vcc_lo +; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f16_test9: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, 0xcc00 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xd000, v3, vcc_lo +; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, half -1.600000e+01, half -3.200000e+01 + %ldexp = fmul half %x, %y + ret half %ldexp +} + +define half @fmul_select_f16_test10_sel_log2val_neg11_pos11(half %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_mov_b32_e32 v3, 0x45000000 +; GFX7-NEXT: v_mov_b32_e32 v4, 0x3a000000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x6800 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x1000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x1000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x6800, v3, vcc_lo +; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, 0x1000 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x6800, v3, vcc_lo +; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, half 0xH1000, half 0xH6800 + %ldexp = fmul half %x, %y + ret half %ldexp +} + +define half @fmul_select_f16_test11_sel_log2val_pos7_neg14(half %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_mov_b32_e32 v3, 0x38800000 +; GFX7-NEXT: v_mov_b32_e32 v4, 0x43000000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x400 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x5800 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x5800 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x400, v3, vcc_lo +; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, 0x5800 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x400, v3, vcc_lo +; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, half 0xH5800, half 0xH0400 + %ldexp = fmul half %x, %y + ret half %ldexp +} + +define bfloat @fmul_select_bf16_test1(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_bf16_test1: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_bf16_test1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x3f80 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x4000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX9-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 +; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_bf16_test1: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo +; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_bf16_test1: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v3, 0x4000 :: v_dual_lshlrev_b32 v0, 16, v0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo +; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, bfloat 2.000000e+00, bfloat 1.000000e+00 + %ldexp = fmul bfloat %x, %y + ret bfloat %ldexp +} + +define bfloat @fmul_select_bf16_test2(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_bf16_test2: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_bf16_test2: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x3f80 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x3f00 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX9-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 +; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_bf16_test2: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x3f00 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo +; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_bf16_test2: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v3, 0x3f00 :: v_dual_lshlrev_b32 v0, 16, v0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo +; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, bfloat 5.000000e-01, bfloat 1.000000e+00 + %ldexp = fmul bfloat %x, %y + ret bfloat %ldexp +} + +define <2 x bfloat> @fmul_select_v2bf16_test3(<2 x bfloat> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) { +; GFX7-LABEL: fmul_select_v2bf16_test3: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc +; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_v2bf16_test3: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v5, 0x3f80 +; GFX9-NEXT: v_mov_b32_e32 v6, 0x4000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-NEXT: v_mul_f32_e32 v1, v3, v1 +; GFX9-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-NEXT: v_bfe_u32 v3, v1, 16, 1 +; GFX9-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX9-NEXT: v_add3_u32 v3, v3, v1, s4 +; GFX9-NEXT: v_or_b32_e32 v4, 0x400000, v1 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 +; GFX9-NEXT: v_bfe_u32 v2, v0, 16, 1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_add3_u32 v2, v2, v0, s4 +; GFX9-NEXT: v_or_b32_e32 v3, 0x400000, v0 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x7060302 +; GFX9-NEXT: v_perm_b32 v0, v0, v1, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_v2bf16_test3: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v5, 0x4000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX1030-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX1030-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v1, v3, v1 +; GFX1030-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX1030-NEXT: v_or_b32_e32 v4, 0x400000, v1 +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1030-NEXT: v_bfe_u32 v2, v1, 16, 1 +; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 +; GFX1030-NEXT: v_bfe_u32 v3, v0, 16, 1 +; GFX1030-NEXT: v_add3_u32 v2, v2, v1, 0x7fff +; GFX1030-NEXT: v_or_b32_e32 v5, 0x400000, v0 +; GFX1030-NEXT: v_add3_u32 v3, v3, v0, 0x7fff +; GFX1030-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo +; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo +; GFX1030-NEXT: v_perm_b32 v0, v0, v1, 0x7060302 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_v2bf16_test3: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v5, 0x4000 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX1100-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX1100-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo +; GFX1100-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_lshlrev_b32 v1, 16, v1 +; GFX1100-NEXT: v_or_b32_e32 v5, 0x400000, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-NEXT: v_mul_f32_e32 v1, v3, v1 +; GFX1100-NEXT: v_bfe_u32 v3, v0, 16, 1 +; GFX1100-NEXT: v_bfe_u32 v2, v1, 16, 1 +; GFX1100-NEXT: v_or_b32_e32 v4, 0x400000, v1 +; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1100-NEXT: v_add3_u32 v3, v3, v0, 0x7fff +; GFX1100-NEXT: v_add3_u32 v2, v2, v1, 0x7fff +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo +; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1100-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_perm_b32 v0, v0, v1, 0x7060302 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 + %y = select <2 x i1> %bool, <2 x bfloat> , <2 x bfloat> + %ldexp = fmul <2 x bfloat> %x, %y + ret <2 x bfloat> %ldexp +} + +define <2 x bfloat> @fmul_select_v2bf16_test4(<2 x bfloat> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) { +; GFX7-LABEL: fmul_select_v2bf16_test4: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc +; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_v2bf16_test4: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v5, 0x3f80 +; GFX9-NEXT: v_mov_b32_e32 v6, 0x3f00 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-NEXT: v_mul_f32_e32 v1, v3, v1 +; GFX9-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-NEXT: v_bfe_u32 v3, v1, 16, 1 +; GFX9-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX9-NEXT: v_add3_u32 v3, v3, v1, s4 +; GFX9-NEXT: v_or_b32_e32 v4, 0x400000, v1 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 +; GFX9-NEXT: v_bfe_u32 v2, v0, 16, 1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_add3_u32 v2, v2, v0, s4 +; GFX9-NEXT: v_or_b32_e32 v3, 0x400000, v0 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x7060302 +; GFX9-NEXT: v_perm_b32 v0, v0, v1, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_v2bf16_test4: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v5, 0x3f00 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX1030-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX1030-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v1, v3, v1 +; GFX1030-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX1030-NEXT: v_or_b32_e32 v4, 0x400000, v1 +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1030-NEXT: v_bfe_u32 v2, v1, 16, 1 +; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 +; GFX1030-NEXT: v_bfe_u32 v3, v0, 16, 1 +; GFX1030-NEXT: v_add3_u32 v2, v2, v1, 0x7fff +; GFX1030-NEXT: v_or_b32_e32 v5, 0x400000, v0 +; GFX1030-NEXT: v_add3_u32 v3, v3, v0, 0x7fff +; GFX1030-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo +; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo +; GFX1030-NEXT: v_perm_b32 v0, v0, v1, 0x7060302 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_v2bf16_test4: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v5, 0x3f00 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX1100-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX1100-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo +; GFX1100-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_lshlrev_b32 v1, 16, v1 +; GFX1100-NEXT: v_or_b32_e32 v5, 0x400000, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-NEXT: v_mul_f32_e32 v1, v3, v1 +; GFX1100-NEXT: v_bfe_u32 v3, v0, 16, 1 +; GFX1100-NEXT: v_bfe_u32 v2, v1, 16, 1 +; GFX1100-NEXT: v_or_b32_e32 v4, 0x400000, v1 +; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1100-NEXT: v_add3_u32 v3, v3, v0, 0x7fff +; GFX1100-NEXT: v_add3_u32 v2, v2, v1, 0x7fff +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo +; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1100-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_perm_b32 v0, v0, v1, 0x7060302 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 + %y = select <2 x i1> %bool, <2 x bfloat> , <2 x bfloat> + %ldexp = fmul <2 x bfloat> %x, %y + ret <2 x bfloat> %ldexp +} + +define bfloat @fmul_select_bf16_test5(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_bf16_test5: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e64 v1, v3, 2.0, vcc +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_bf16_test5: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x4100 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x4000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX9-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 +; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_bf16_test5: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4100, v3, vcc_lo +; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_bf16_test5: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v3, 0x4000 :: v_dual_lshlrev_b32 v0, 16, v0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4100, v3, vcc_lo +; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, bfloat 2.000000e+00, bfloat 8.000000e+00 + %ldexp = fmul bfloat %x, %y + ret bfloat %ldexp +} + +define bfloat @fmul_select_bf16_test6(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_bf16_test6: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: v_mov_b32_e32 v3, 0x40400000 +; GFX7-NEXT: v_mov_b32_e32 v4, 0xc1000000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_bf16_test6: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x4040 +; GFX9-NEXT: v_mov_b32_e32 v4, 0xffffc100 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX9-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 +; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_bf16_test6: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0xffffc100 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4040, v3, vcc_lo +; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_bf16_test6: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v3, 0xffffc100 :: v_dual_lshlrev_b32 v0, 16, v0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4040, v3, vcc_lo +; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, bfloat -8.000000e+00, bfloat 3.000000e+00 + %ldexp = fmul bfloat %x, %y + ret bfloat %ldexp +} + +define bfloat @fmul_select_bf16_test7(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_bf16_test7: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, -4.0, v3, vcc +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_bf16_test7: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0xffffc080 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x4100 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX9-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 +; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_bf16_test7: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4100 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xffffc080, v3, vcc_lo +; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_bf16_test7: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v3, 0x4100 :: v_dual_lshlrev_b32 v0, 16, v0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xffffc080, v3, vcc_lo +; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, bfloat 8.000000e+00, bfloat -4.000000e+00 + %ldexp = fmul bfloat %x, %y + ret bfloat %ldexp +} + +define bfloat @fmul_select_bf16_test8(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_bf16_test8: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 31, v1 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_bf16_test8: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX9-NEXT: v_mov_b32_e32 v2, 15 +; GFX9-NEXT: v_lshlrev_b16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX9-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 +; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_bf16_test8: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX1030-NEXT: v_lshlrev_b16 v1, 15, v1 +; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_bf16_test8: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_lshlrev_b16 v1, 15, v1 +; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, bfloat -0.000000e+00, bfloat 0.000000e+00 + %ldexp = fmul bfloat %x, %y + ret bfloat %ldexp +} + +define bfloat @fmul_select_bf16_test9(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_bf16_test9: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: v_mov_b32_e32 v3, 0xc2000000 +; GFX7-NEXT: v_mov_b32_e32 v4, 0xc1800000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_bf16_test9: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0xffffc200 +; GFX9-NEXT: v_mov_b32_e32 v4, 0xffffc180 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX9-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 +; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_bf16_test9: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0xffffc180 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xffffc200, v3, vcc_lo +; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_bf16_test9: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v3, 0xffffc180 :: v_dual_lshlrev_b32 v0, 16, v0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xffffc200, v3, vcc_lo +; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, bfloat -1.600000e+01, bfloat -3.200000e+01 + %ldexp = fmul bfloat %x, %y + ret bfloat %ldexp +} + +define bfloat @fmul_select_bf16_test10_sel_log2val_pos65_pos56(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: v_mov_b32_e32 v3, 0xdb800000 +; GFX7-NEXT: v_bfrev_b32_e32 v4, 7 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0xffffdb80 +; GFX9-NEXT: v_mov_b32_e32 v4, 0xffffe000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX9-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 +; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0xffffe000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xffffdb80, v3, vcc_lo +; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v3, 0xffffe000 :: v_dual_lshlrev_b32 v0, 16, v0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xffffdb80, v3, vcc_lo +; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, bfloat 0xRE000, bfloat 0xRDB80 + %ldexp = fmul bfloat %x, %y + ret bfloat %ldexp +} + +define bfloat @fmul_select_bf16_test11_sel_log2val_neg22_pos25(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: v_bfrev_b32_e32 v3, 50 +; GFX7-NEXT: v_mov_b32_e32 v4, 0x34800000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x4c00 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x3480 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX9-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 +; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x3480 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4c00, v3, vcc_lo +; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v3, 0x3480 :: v_dual_lshlrev_b32 v0, 16, v0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4c00, v3, vcc_lo +; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, bfloat 0xR3480, bfloat 0xR4C00 + %ldexp = fmul bfloat %x, %y + ret bfloat %ldexp +} + diff --git a/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx950.ll b/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx950.ll new file mode 100644 index 0000000000000..ab380dbef107a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx950.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx950 -global-isel=0 -verify-machineinstrs | FileCheck %s -check-prefix=GFX950-SDAG +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx950 -global-isel=1 -verify-machineinstrs | FileCheck %s -check-prefix=GFX950-GISEL + +declare <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat>, <4 x i32>, i32, i32, i32, i32 immarg) +declare <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32, i32, i32) + +define amdgpu_ps float @struct_buffer_atomic_add_v2bf16_ret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { +; GFX950-SDAG-LABEL: struct_buffer_atomic_add_v2bf16_ret: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1 +; GFX950-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v[2:3], s[0:3], s4 idxen offen sc0 +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[2:3], 0 +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX950-SDAG-NEXT: flat_store_dword v[2:3], v0 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 1.0 +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: ; return to shader part epilog +; +; GFX950-GISEL-LABEL: struct_buffer_atomic_add_v2bf16_ret: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v1 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v5, v2 +; GFX950-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v[4:5], s[0:3], s4 idxen offen sc0 +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], 0 +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX950-GISEL-NEXT: flat_store_dword v[2:3], v0 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, 1.0 +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: ; return to shader part epilog + %orig = call <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) + store <2 x bfloat> %orig, ptr null + ret float 1.0 +} + +define amdgpu_ps void @struct_buffer_atomic_add_v2bf16_noret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { +; GFX950-SDAG-LABEL: struct_buffer_atomic_add_v2bf16_noret: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1 +; GFX950-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v[2:3], s[0:3], s4 idxen offen +; GFX950-SDAG-NEXT: s_endpgm +; +; GFX950-GISEL-LABEL: struct_buffer_atomic_add_v2bf16_noret: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v1 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v5, v2 +; GFX950-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v[4:5], s[0:3], s4 idxen offen +; GFX950-GISEL-NEXT: s_endpgm + %orig = call <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_atomic_add_v2bf16(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { +; GFX950-SDAG-LABEL: raw_buffer_atomic_add_v2bf16: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen +; GFX950-SDAG-NEXT: s_endpgm +; +; GFX950-GISEL-LABEL: raw_buffer_atomic_add_v2bf16: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen +; GFX950-GISEL-NEXT: s_endpgm + %ret = call <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps float @raw_buffer_atomic_add_v2bf16_ret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { +; GFX950-SDAG-LABEL: raw_buffer_atomic_add_v2bf16_ret: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen sc0 +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[2:3], 0 +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX950-SDAG-NEXT: flat_store_dword v[2:3], v0 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 1.0 +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: ; return to shader part epilog +; +; GFX950-GISEL-LABEL: raw_buffer_atomic_add_v2bf16_ret: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen sc0 +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], 0 +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX950-GISEL-NEXT: flat_store_dword v[2:3], v0 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, 1.0 +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: ; return to shader part epilog + %orig = call <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + store <2 x bfloat> %orig, ptr null + ret float 1.0 +} diff --git a/llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir b/llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir new file mode 100644 index 0000000000000..7583431675095 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir @@ -0,0 +1,257 @@ +# RUN: llc -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs -run-pass=post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s + +--- +# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop1 +# GCN: V_CMPX_EQ_I32_e32 +# GCN-NEXT: S_NOP 3 +# GCN-NEXT: V_PERMLANE +name: vcmpx_vopc_write_exec_permlane16_swap_vop1 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec +... + +--- +# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane16_swap_vop1 +# GCN: V_CMPX_EQ_I32_e64 +# GCN-NEXT: S_NOP 3 +# GCN-NEXT: V_PERMLANE +name: vcmpx_vop3_write_exec_permlane16_swap_vop1 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + $exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec +... + +--- +# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop3 +# GCN: V_CMPX_EQ_I32_e32 +# GCN-NEXT: S_NOP 3 +# GCN-NEXT: V_PERMLANE +name: vcmpx_vopc_write_exec_permlane16_swap_vop3 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec +... + +--- +# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane16_swap_vop3 +# GCN: V_CMPX_EQ_I32_e64 +# GCN-NEXT: S_NOP 3 +# GCN-NEXT: V_PERMLANE +name: vcmpx_vop3_write_exec_permlane16_swap_vop3 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + $exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec +... + +--- +# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane32_swap_vop1 +# GCN: V_CMPX_EQ_I32_e32 +# GCN-NEXT: S_NOP 3 +# GCN-NEXT: V_PERMLANE +name: vcmpx_vopc_write_exec_permlane32_swap_vop1 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec +... + +--- +# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane32_swap_vop1 +# GCN: V_CMPX_EQ_I32_e64 +# GCN-NEXT: S_NOP 3 +# GCN-NEXT: V_PERMLANE +name: vcmpx_vop3_write_exec_permlane32_swap_vop1 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + $exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec +... + +--- +# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane32_swap_vop3 +# GCN: V_CMPX_EQ_I32_e32 +# GCN-NEXT: S_NOP 3 +# GCN-NEXT: V_PERMLANE +name: vcmpx_vopc_write_exec_permlane32_swap_vop3 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec +... + +--- +# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane32_swap_vop3 +# GCN: V_CMPX_EQ_I32_e64 +# GCN-NEXT: S_NOP 3 +# GCN-NEXT: V_PERMLANE +name: vcmpx_vop3_write_exec_permlane32_swap_vop3 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + $exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec +... + +--- +# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop1__nowait +# GCN: V_CMPX_EQ_I32_e32 +# GCN-NEXT: V_MOV_B32 +# GCN-NEXT: V_MOV_B32 +# GCN-NEXT: V_MOV_B32 +# GCN-NEXT: V_MOV_B32 +# GCN-NEXT: V_PERMLANE +name: vcmpx_vopc_write_exec_permlane16_swap_vop1__nowait +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec + $vgpr2 = V_MOV_B32_e32 0, implicit $exec + $vgpr3 = V_MOV_B32_e32 0, implicit $exec + $vgpr4 = V_MOV_B32_e32 0, implicit $exec + $vgpr5 = V_MOV_B32_e32 0, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec +... + +--- +# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop1__wait1 +# GCN: V_CMPX_EQ_I32_e32 +# GCN-NEXT: V_MOV_B32 +# GCN-NEXT: V_MOV_B32 +# GCN-NEXT: V_MOV_B32 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_PERMLANE +name: vcmpx_vopc_write_exec_permlane16_swap_vop1__wait1 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec + $vgpr2 = V_MOV_B32_e32 0, implicit $exec + $vgpr3 = V_MOV_B32_e32 0, implicit $exec + $vgpr4 = V_MOV_B32_e32 0, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec +... + +--- +# GCN-LABEL: name: valu_write_vdst_read_permlane16_swap_0 +# GCN: V_MOV_B32 +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_PERMLANE +name: valu_write_vdst_read_permlane16_swap_0 +body: | + bb.0: + liveins: $vgpr1 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec +... + +--- +# GCN-LABEL: name: valu_write_vdst_read_permlane16_swap_1 +# GCN: V_MOV_B32 +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_PERMLANE +name: valu_write_vdst_read_permlane16_swap_1 +body: | + bb.0: + liveins: $vgpr0 + $vgpr1 = V_MOV_B32_e32 0, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec +... + +--- +# GCN-LABEL: name: valu_write_vdst_read_permlane32_swap_0 +# GCN: V_MOV_B32 +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_PERMLANE +name: valu_write_vdst_read_permlane32_swap_0 +body: | + bb.0: + liveins: $vgpr1 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec +... + +--- +# GCN-LABEL: name: valu_write_vdst_read_permlane32_swap_1 +# GCN: V_MOV_B32 +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_PERMLANE +name: valu_write_vdst_read_permlane32_swap_1 +body: | + bb.0: + liveins: $vgpr0 + $vgpr1 = V_MOV_B32_e32 0, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec +... + +--- +# No hazard, write of other register +# GCN-LABEL: name: valu_write_vdst_read_permlane16_swap_0_otherreg +# GCN: V_MOV_B32 +# GCN-NEXT: V_PERMLANE +name: valu_write_vdst_read_permlane16_swap_0_otherreg +body: | + bb.0: + liveins: $vgpr1 + $vgpr2 = V_MOV_B32_e32 0, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec +... + +--- +# Both permlane hazards at once. +# GCN-LABEL: name: valu_writes_vdst__vcmpx_write_exec__permlane32_swap +# GCN: V_MOV_B32 +# GCN: V_CMPX_EQ_I32 +# GCN-NEXT: S_NOP 3 +# GCN-NEXT: V_PERMLANE +name: valu_writes_vdst__vcmpx_write_exec__permlane32_swap +body: | + bb.0: + liveins: $vgpr0, $vgpr2, $vgpr3 + $vgpr1 = V_MOV_B32_e32 0, implicit $exec + $exec = V_CMPX_EQ_I32_e64 $vgpr2, $vgpr3, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec +... + +--- +# GCN-LABEL: name: vcmpx_write_exec__valu_writes_vdst___permlane32_swap +# GCN: V_CMPX_EQ_I32 +# GCN: V_MOV_B32 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_PERMLANE +name: vcmpx_write_exec__valu_writes_vdst___permlane32_swap +body: | + bb.0: + liveins: $vgpr0, $vgpr2, $vgpr3 + $exec = V_CMPX_EQ_I32_e64 $vgpr2, $vgpr3, implicit $exec + $vgpr1 = V_MOV_B32_e32 0, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec +... + +--- +# GCN-LABEL: name: vcmpx_write_exec__valu_writes_vdstx2___permlane32_swap +# GCN: V_CMPX_EQ_I32 +# GCN: V_MOV_B32 +# GCN: V_MOV_B32 +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_PERMLANE +name: vcmpx_write_exec__valu_writes_vdstx2___permlane32_swap +body: | + bb.0: + liveins: $vgpr0, $vgpr2, $vgpr3 + $exec = V_CMPX_EQ_I32_e64 $vgpr2, $vgpr3, implicit $exec + $vgpr1 = V_MOV_B32_e32 0, implicit $exec + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec +... diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.pk.gfx950.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.pk.gfx950.ll new file mode 100644 index 0000000000000..c407616556b5a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.pk.gfx950.ll @@ -0,0 +1,128 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX950-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX950-GISEL %s + +declare <6 x i32> @llvm.amdgcn.cvt.scalef32.2xpk16.bf6.f32(<16 x float> %src0, <16 x float> %src1, float %scale) +declare <6 x i32> @llvm.amdgcn.cvt.scalef32.2xpk16.fp6.f32(<16 x float> %src0, <16 x float> %src1, float %scale) + +define amdgpu_ps void @test_scalef32_pk32_fp6_f32_vv(<16 x float> %src, float %scale, ptr addrspace(1) %out) { +; GFX950-SDAG-LABEL: test_scalef32_pk32_fp6_f32_vv: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: v_mov_b32_e32 v19, v18 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v18, v17 +; GFX950-SDAG-NEXT: v_cvt_scalef32_2xpk16_fp6_f32 v[0:5], v[0:15], v[0:15], v16 +; GFX950-SDAG-NEXT: global_store_dwordx2 v[18:19], v[4:5], off offset:16 +; GFX950-SDAG-NEXT: global_store_dwordx4 v[18:19], v[0:3], off +; GFX950-SDAG-NEXT: s_endpgm +; +; GFX950-GISEL-LABEL: test_scalef32_pk32_fp6_f32_vv: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: v_mov_b32_e32 v20, v17 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v21, v18 +; GFX950-GISEL-NEXT: v_cvt_scalef32_2xpk16_fp6_f32 v[0:5], v[0:15], v[0:15], v16 +; GFX950-GISEL-NEXT: global_store_dwordx4 v[20:21], v[0:3], off +; GFX950-GISEL-NEXT: global_store_dwordx2 v[20:21], v[4:5], off offset:16 +; GFX950-GISEL-NEXT: s_endpgm + %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.2xpk16.fp6.f32(<16 x float> %src, <16 x float> %src, float %scale) + store <6 x i32> %cvt, ptr addrspace(1) %out, align 8 + ret void +} + +define amdgpu_ps void @test_scalef32_pk32_fp6_f32_sl(<16 x float> inreg %src, ptr addrspace(1) %out) { +; GFX950-SDAG-LABEL: test_scalef32_pk32_fp6_f32_sl: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[16:17], s[14:15] +; GFX950-SDAG-NEXT: s_mov_b32 s16, 0x42c80000 +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[14:15], s[12:13] +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[12:13], s[10:11] +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[10:11], s[8:9] +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[8:9], s[6:7] +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[4:5] +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[2:3] +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX950-SDAG-NEXT: v_cvt_scalef32_2xpk16_fp6_f32 v[2:7], v[2:17], v[2:17], s16 +; GFX950-SDAG-NEXT: global_store_dwordx2 v[0:1], v[6:7], off offset:16 +; GFX950-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off +; GFX950-SDAG-NEXT: s_endpgm +; +; GFX950-GISEL-LABEL: test_scalef32_pk32_fp6_f32_sl: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[16:17], s[14:15] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[14:15], s[12:13] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[12:13], s[10:11] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[10:11], s[8:9] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[6:7] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[4:5] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX950-GISEL-NEXT: v_mov_b32_e32 v18, 0x42c80000 +; GFX950-GISEL-NEXT: v_cvt_scalef32_2xpk16_fp6_f32 v[2:7], v[2:17], v[2:17], v18 +; GFX950-GISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off +; GFX950-GISEL-NEXT: global_store_dwordx2 v[0:1], v[6:7], off offset:16 +; GFX950-GISEL-NEXT: s_endpgm + %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.2xpk16.fp6.f32(<16 x float> %src, <16 x float> %src, float 100.0) + store <6 x i32> %cvt, ptr addrspace(1) %out, align 8 + ret void +} + +define amdgpu_ps void @test_scalef32_pk32_bf6_f32_vv(<16 x float> %src, float %scale, ptr addrspace(1) %out) { +; GFX950-SDAG-LABEL: test_scalef32_pk32_bf6_f32_vv: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: v_mov_b32_e32 v19, v18 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v18, v17 +; GFX950-SDAG-NEXT: v_cvt_scalef32_2xpk16_bf6_f32 v[0:5], v[0:15], v[0:15], v16 +; GFX950-SDAG-NEXT: global_store_dwordx2 v[18:19], v[4:5], off offset:16 +; GFX950-SDAG-NEXT: global_store_dwordx4 v[18:19], v[0:3], off +; GFX950-SDAG-NEXT: s_endpgm +; +; GFX950-GISEL-LABEL: test_scalef32_pk32_bf6_f32_vv: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: v_mov_b32_e32 v20, v17 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v21, v18 +; GFX950-GISEL-NEXT: v_cvt_scalef32_2xpk16_bf6_f32 v[0:5], v[0:15], v[0:15], v16 +; GFX950-GISEL-NEXT: global_store_dwordx4 v[20:21], v[0:3], off +; GFX950-GISEL-NEXT: global_store_dwordx2 v[20:21], v[4:5], off offset:16 +; GFX950-GISEL-NEXT: s_endpgm + %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.2xpk16.bf6.f32(<16 x float> %src, <16 x float> %src, float %scale) + store <6 x i32> %cvt, ptr addrspace(1) %out, align 8 + ret void +} + +define amdgpu_ps void @test_scalef32_pk32_bf6_f32_sl(<16 x float> inreg %src, ptr addrspace(1) %out) { +; GFX950-SDAG-LABEL: test_scalef32_pk32_bf6_f32_sl: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[16:17], s[14:15] +; GFX950-SDAG-NEXT: s_mov_b32 s16, 0x42c80000 +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[14:15], s[12:13] +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[12:13], s[10:11] +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[10:11], s[8:9] +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[8:9], s[6:7] +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[4:5] +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[2:3] +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX950-SDAG-NEXT: v_cvt_scalef32_2xpk16_bf6_f32 v[2:7], v[2:17], v[2:17], s16 +; GFX950-SDAG-NEXT: global_store_dwordx2 v[0:1], v[6:7], off offset:16 +; GFX950-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off +; GFX950-SDAG-NEXT: s_endpgm +; +; GFX950-GISEL-LABEL: test_scalef32_pk32_bf6_f32_sl: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[16:17], s[14:15] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[14:15], s[12:13] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[12:13], s[10:11] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[10:11], s[8:9] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[6:7] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[4:5] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX950-GISEL-NEXT: v_mov_b32_e32 v18, 0x42c80000 +; GFX950-GISEL-NEXT: v_cvt_scalef32_2xpk16_bf6_f32 v[2:7], v[2:17], v[2:17], v18 +; GFX950-GISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off +; GFX950-GISEL-NEXT: global_store_dwordx2 v[0:1], v[6:7], off offset:16 +; GFX950-GISEL-NEXT: s_endpgm + %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.2xpk16.bf6.f32(<16 x float> %src, <16 x float> %src, float 100.0) + store <6 x i32> %cvt, ptr addrspace(1) %out, align 8 + ret void +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GCN: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.pk.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.pk.ll new file mode 100644 index 0000000000000..4153bc8f43563 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.pk.ll @@ -0,0 +1,474 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX950-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX950-GISEL %s + +declare <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.bf6.bf16(<32 x bfloat> %src, float %scale) +declare <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.bf6.f16(<32 x half> %src, float %scale) +declare <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.fp6.bf16(<32 x bfloat> %src, float %scale) +declare <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.fp6.f16(<32 x half> %src, float %scale) + +define amdgpu_ps void @test_scalef32_pk32_bf6_bf16_vv(<32 x bfloat> %src, float %scale, ptr addrspace(1) %out) { +; GFX950-SDAG-LABEL: test_scalef32_pk32_bf6_bf16_vv: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: v_mov_b32_e32 v19, v18 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v18, v17 +; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_bf6_bf16 v[0:5], v[0:15], v16 +; GFX950-SDAG-NEXT: global_store_dwordx2 v[18:19], v[4:5], off offset:16 +; GFX950-SDAG-NEXT: global_store_dwordx4 v[18:19], v[0:3], off +; GFX950-SDAG-NEXT: s_endpgm +; +; GFX950-GISEL-LABEL: test_scalef32_pk32_bf6_bf16_vv: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: v_mov_b32_e32 v20, v17 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v21, v18 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v17, 16, v0 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v18, 16, v1 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v19, 16, v2 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v22, 16, v3 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v23, 16, v4 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v24, 16, v5 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v25, 16, v6 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v26, 16, v7 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v27, 16, v8 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v28, 16, v9 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v29, 16, v10 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v30, 16, v11 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v31, 16, v12 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v32, 16, v13 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v33, 16, v14 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v34, 16, v15 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v0, v17 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v1, v18 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v2, v19 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v3, v22 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v4, v23 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v5, v24 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v6, v25 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v7, v26 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v8, v27 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v9, v28 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v10, v29 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v11, v30 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v12, v31 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v13, v32 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v14, v33 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v15, v34 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: s_nop 0 +; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_bf6_bf16 v[0:5], v[0:15], v16 +; GFX950-GISEL-NEXT: global_store_dwordx4 v[20:21], v[0:3], off +; GFX950-GISEL-NEXT: global_store_dwordx2 v[20:21], v[4:5], off offset:16 +; GFX950-GISEL-NEXT: s_endpgm + %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.bf6.bf16(<32 x bfloat> %src, float %scale) + store <6 x i32> %cvt, ptr addrspace(1) %out, align 8 + ret void +} + +define amdgpu_ps void @test_scalef32_pk32_bf6_bf16_sl(<32 x bfloat> inreg %src, ptr addrspace(1) %out) { +; GFX950-SDAG-LABEL: test_scalef32_pk32_bf6_bf16_sl: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, s1 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v4, s2 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v5, s3 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v6, s4 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v7, s5 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v8, s6 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v9, s7 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v10, s8 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v11, s9 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v12, s10 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v13, s11 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v14, s12 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v15, s13 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v16, s14 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v17, s15 +; GFX950-SDAG-NEXT: s_mov_b32 s0, 0x42c80000 +; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_bf6_bf16 v[2:7], v[2:17], s0 +; GFX950-SDAG-NEXT: global_store_dwordx2 v[0:1], v[6:7], off offset:16 +; GFX950-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off +; GFX950-SDAG-NEXT: s_endpgm +; +; GFX950-GISEL-LABEL: test_scalef32_pk32_bf6_bf16_sl: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_lshr_b32 s16, s0, 16 +; GFX950-GISEL-NEXT: s_lshr_b32 s17, s1, 16 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s16, 16 +; GFX950-GISEL-NEXT: s_and_b32 s0, s0, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s18, s2, 16 +; GFX950-GISEL-NEXT: s_or_b32 s0, s16, s0 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s17, 16 +; GFX950-GISEL-NEXT: s_and_b32 s1, s1, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s19, s3, 16 +; GFX950-GISEL-NEXT: s_or_b32 s1, s16, s1 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s18, 16 +; GFX950-GISEL-NEXT: s_and_b32 s2, s2, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s20, s4, 16 +; GFX950-GISEL-NEXT: s_or_b32 s2, s16, s2 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s19, 16 +; GFX950-GISEL-NEXT: s_and_b32 s3, s3, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s21, s5, 16 +; GFX950-GISEL-NEXT: s_or_b32 s3, s16, s3 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s20, 16 +; GFX950-GISEL-NEXT: s_and_b32 s4, s4, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s22, s6, 16 +; GFX950-GISEL-NEXT: s_or_b32 s4, s16, s4 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s21, 16 +; GFX950-GISEL-NEXT: s_and_b32 s5, s5, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s23, s7, 16 +; GFX950-GISEL-NEXT: s_or_b32 s5, s16, s5 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s22, 16 +; GFX950-GISEL-NEXT: s_and_b32 s6, s6, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s24, s8, 16 +; GFX950-GISEL-NEXT: s_or_b32 s6, s16, s6 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s23, 16 +; GFX950-GISEL-NEXT: s_and_b32 s7, s7, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s25, s9, 16 +; GFX950-GISEL-NEXT: s_or_b32 s7, s16, s7 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s24, 16 +; GFX950-GISEL-NEXT: s_and_b32 s8, s8, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s26, s10, 16 +; GFX950-GISEL-NEXT: s_or_b32 s8, s16, s8 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s25, 16 +; GFX950-GISEL-NEXT: s_and_b32 s9, s9, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s27, s11, 16 +; GFX950-GISEL-NEXT: s_or_b32 s9, s16, s9 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s26, 16 +; GFX950-GISEL-NEXT: s_and_b32 s10, s10, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s28, s12, 16 +; GFX950-GISEL-NEXT: s_or_b32 s10, s16, s10 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s27, 16 +; GFX950-GISEL-NEXT: s_and_b32 s11, s11, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s29, s13, 16 +; GFX950-GISEL-NEXT: s_or_b32 s11, s16, s11 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s28, 16 +; GFX950-GISEL-NEXT: s_and_b32 s12, s12, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s30, s14, 16 +; GFX950-GISEL-NEXT: s_or_b32 s12, s16, s12 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s29, 16 +; GFX950-GISEL-NEXT: s_and_b32 s13, s13, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s31, s15, 16 +; GFX950-GISEL-NEXT: s_or_b32 s13, s16, s13 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s30, 16 +; GFX950-GISEL-NEXT: s_and_b32 s14, s14, 0xffff +; GFX950-GISEL-NEXT: s_or_b32 s14, s16, s14 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s31, 16 +; GFX950-GISEL-NEXT: s_and_b32 s15, s15, 0xffff +; GFX950-GISEL-NEXT: s_or_b32 s15, s16, s15 +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[16:17], s[14:15] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[14:15], s[12:13] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[12:13], s[10:11] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[10:11], s[8:9] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[6:7] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[4:5] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX950-GISEL-NEXT: v_mov_b32_e32 v18, 0x42c80000 +; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_bf6_bf16 v[2:7], v[2:17], v18 +; GFX950-GISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off +; GFX950-GISEL-NEXT: global_store_dwordx2 v[0:1], v[6:7], off offset:16 +; GFX950-GISEL-NEXT: s_endpgm + %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.bf6.bf16(<32 x bfloat> %src, float 100.0) + store <6 x i32> %cvt, ptr addrspace(1) %out, align 8 + ret void +} + +define amdgpu_ps void @test_scalef32_pk32_bf6_f16_vv(<32 x half> %src, float %scale, ptr addrspace(1) %out) { +; GFX950-SDAG-LABEL: test_scalef32_pk32_bf6_f16_vv: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: v_mov_b32_e32 v19, v18 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v18, v17 +; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_bf6_f16 v[0:5], v[0:15], v16 +; GFX950-SDAG-NEXT: global_store_dwordx2 v[18:19], v[4:5], off offset:16 +; GFX950-SDAG-NEXT: global_store_dwordx4 v[18:19], v[0:3], off +; GFX950-SDAG-NEXT: s_endpgm +; +; GFX950-GISEL-LABEL: test_scalef32_pk32_bf6_f16_vv: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: v_mov_b32_e32 v20, v17 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v21, v18 +; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_bf6_f16 v[0:5], v[0:15], v16 +; GFX950-GISEL-NEXT: global_store_dwordx4 v[20:21], v[0:3], off +; GFX950-GISEL-NEXT: global_store_dwordx2 v[20:21], v[4:5], off offset:16 +; GFX950-GISEL-NEXT: s_endpgm + %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.bf6.f16(<32 x half> %src, float %scale) + store <6 x i32> %cvt, ptr addrspace(1) %out, align 8 + ret void +} + +define amdgpu_ps void @test_scalef32_pk32_bf6_f16_sl(<32 x half> inreg %src, ptr addrspace(1) %out) { +; GFX950-SDAG-LABEL: test_scalef32_pk32_bf6_f16_sl: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, s1 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v4, s2 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v5, s3 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v6, s4 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v7, s5 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v8, s6 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v9, s7 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v10, s8 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v11, s9 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v12, s10 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v13, s11 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v14, s12 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v15, s13 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v16, s14 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v17, s15 +; GFX950-SDAG-NEXT: s_mov_b32 s0, 0x42c80000 +; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_bf6_f16 v[2:7], v[2:17], s0 +; GFX950-SDAG-NEXT: global_store_dwordx2 v[0:1], v[6:7], off offset:16 +; GFX950-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off +; GFX950-SDAG-NEXT: s_endpgm +; +; GFX950-GISEL-LABEL: test_scalef32_pk32_bf6_f16_sl: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[16:17], s[14:15] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[14:15], s[12:13] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[12:13], s[10:11] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[10:11], s[8:9] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[6:7] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[4:5] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX950-GISEL-NEXT: v_mov_b32_e32 v18, 0x42c80000 +; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_bf6_f16 v[2:7], v[2:17], v18 +; GFX950-GISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off +; GFX950-GISEL-NEXT: global_store_dwordx2 v[0:1], v[6:7], off offset:16 +; GFX950-GISEL-NEXT: s_endpgm + %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.bf6.f16(<32 x half> %src, float 100.0) + store <6 x i32> %cvt, ptr addrspace(1) %out, align 8 + ret void +} + +define amdgpu_ps void @test_scalef32_pk32_fp6_bf16_vv(<32 x bfloat> %src, float %scale, ptr addrspace(1) %out) { +; GFX950-SDAG-LABEL: test_scalef32_pk32_fp6_bf16_vv: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: v_mov_b32_e32 v19, v18 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v18, v17 +; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_fp6_bf16 v[0:5], v[0:15], v16 +; GFX950-SDAG-NEXT: global_store_dwordx2 v[18:19], v[4:5], off offset:16 +; GFX950-SDAG-NEXT: global_store_dwordx4 v[18:19], v[0:3], off +; GFX950-SDAG-NEXT: s_endpgm +; +; GFX950-GISEL-LABEL: test_scalef32_pk32_fp6_bf16_vv: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: v_mov_b32_e32 v20, v17 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v21, v18 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v17, 16, v0 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v18, 16, v1 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v19, 16, v2 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v22, 16, v3 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v23, 16, v4 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v24, 16, v5 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v25, 16, v6 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v26, 16, v7 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v27, 16, v8 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v28, 16, v9 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v29, 16, v10 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v30, 16, v11 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v31, 16, v12 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v32, 16, v13 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v33, 16, v14 +; GFX950-GISEL-NEXT: v_lshrrev_b32_e32 v34, 16, v15 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v0, v17 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v1, v18 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v2, v19 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v3, v22 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v4, v23 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v5, v24 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v6, v25 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v7, v26 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v8, v27 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v9, v28 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v10, v29 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v11, v30 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v12, v31 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v13, v32 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v14, v33 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: v_mov_b32_sdwa v15, v34 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +; GFX950-GISEL-NEXT: s_nop 0 +; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_fp6_bf16 v[0:5], v[0:15], v16 +; GFX950-GISEL-NEXT: global_store_dwordx4 v[20:21], v[0:3], off +; GFX950-GISEL-NEXT: global_store_dwordx2 v[20:21], v[4:5], off offset:16 +; GFX950-GISEL-NEXT: s_endpgm + %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.fp6.bf16(<32 x bfloat> %src, float %scale) + store <6 x i32> %cvt, ptr addrspace(1) %out, align 8 + ret void +} + +define amdgpu_ps void @test_scalef32_pk32_fp6_bf16_sl(<32 x bfloat> inreg %src, ptr addrspace(1) %out) { +; GFX950-SDAG-LABEL: test_scalef32_pk32_fp6_bf16_sl: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, s1 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v4, s2 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v5, s3 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v6, s4 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v7, s5 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v8, s6 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v9, s7 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v10, s8 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v11, s9 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v12, s10 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v13, s11 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v14, s12 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v15, s13 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v16, s14 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v17, s15 +; GFX950-SDAG-NEXT: s_mov_b32 s0, 0x42c80000 +; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_fp6_bf16 v[2:7], v[2:17], s0 +; GFX950-SDAG-NEXT: global_store_dwordx2 v[0:1], v[6:7], off offset:16 +; GFX950-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off +; GFX950-SDAG-NEXT: s_endpgm +; +; GFX950-GISEL-LABEL: test_scalef32_pk32_fp6_bf16_sl: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_lshr_b32 s16, s0, 16 +; GFX950-GISEL-NEXT: s_lshr_b32 s17, s1, 16 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s16, 16 +; GFX950-GISEL-NEXT: s_and_b32 s0, s0, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s18, s2, 16 +; GFX950-GISEL-NEXT: s_or_b32 s0, s16, s0 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s17, 16 +; GFX950-GISEL-NEXT: s_and_b32 s1, s1, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s19, s3, 16 +; GFX950-GISEL-NEXT: s_or_b32 s1, s16, s1 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s18, 16 +; GFX950-GISEL-NEXT: s_and_b32 s2, s2, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s20, s4, 16 +; GFX950-GISEL-NEXT: s_or_b32 s2, s16, s2 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s19, 16 +; GFX950-GISEL-NEXT: s_and_b32 s3, s3, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s21, s5, 16 +; GFX950-GISEL-NEXT: s_or_b32 s3, s16, s3 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s20, 16 +; GFX950-GISEL-NEXT: s_and_b32 s4, s4, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s22, s6, 16 +; GFX950-GISEL-NEXT: s_or_b32 s4, s16, s4 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s21, 16 +; GFX950-GISEL-NEXT: s_and_b32 s5, s5, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s23, s7, 16 +; GFX950-GISEL-NEXT: s_or_b32 s5, s16, s5 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s22, 16 +; GFX950-GISEL-NEXT: s_and_b32 s6, s6, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s24, s8, 16 +; GFX950-GISEL-NEXT: s_or_b32 s6, s16, s6 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s23, 16 +; GFX950-GISEL-NEXT: s_and_b32 s7, s7, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s25, s9, 16 +; GFX950-GISEL-NEXT: s_or_b32 s7, s16, s7 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s24, 16 +; GFX950-GISEL-NEXT: s_and_b32 s8, s8, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s26, s10, 16 +; GFX950-GISEL-NEXT: s_or_b32 s8, s16, s8 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s25, 16 +; GFX950-GISEL-NEXT: s_and_b32 s9, s9, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s27, s11, 16 +; GFX950-GISEL-NEXT: s_or_b32 s9, s16, s9 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s26, 16 +; GFX950-GISEL-NEXT: s_and_b32 s10, s10, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s28, s12, 16 +; GFX950-GISEL-NEXT: s_or_b32 s10, s16, s10 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s27, 16 +; GFX950-GISEL-NEXT: s_and_b32 s11, s11, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s29, s13, 16 +; GFX950-GISEL-NEXT: s_or_b32 s11, s16, s11 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s28, 16 +; GFX950-GISEL-NEXT: s_and_b32 s12, s12, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s30, s14, 16 +; GFX950-GISEL-NEXT: s_or_b32 s12, s16, s12 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s29, 16 +; GFX950-GISEL-NEXT: s_and_b32 s13, s13, 0xffff +; GFX950-GISEL-NEXT: s_lshr_b32 s31, s15, 16 +; GFX950-GISEL-NEXT: s_or_b32 s13, s16, s13 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s30, 16 +; GFX950-GISEL-NEXT: s_and_b32 s14, s14, 0xffff +; GFX950-GISEL-NEXT: s_or_b32 s14, s16, s14 +; GFX950-GISEL-NEXT: s_lshl_b32 s16, s31, 16 +; GFX950-GISEL-NEXT: s_and_b32 s15, s15, 0xffff +; GFX950-GISEL-NEXT: s_or_b32 s15, s16, s15 +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[16:17], s[14:15] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[14:15], s[12:13] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[12:13], s[10:11] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[10:11], s[8:9] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[6:7] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[4:5] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX950-GISEL-NEXT: v_mov_b32_e32 v18, 0x42c80000 +; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_fp6_bf16 v[2:7], v[2:17], v18 +; GFX950-GISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off +; GFX950-GISEL-NEXT: global_store_dwordx2 v[0:1], v[6:7], off offset:16 +; GFX950-GISEL-NEXT: s_endpgm + %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.fp6.bf16(<32 x bfloat> %src, float 100.0) + store <6 x i32> %cvt, ptr addrspace(1) %out, align 8 + ret void +} + +define amdgpu_ps void @test_scalef32_pk32_fp6_f16_vv(<32 x half> %src, float %scale, ptr addrspace(1) %out) { +; GFX950-SDAG-LABEL: test_scalef32_pk32_fp6_f16_vv: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: v_mov_b32_e32 v19, v18 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v18, v17 +; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_fp6_f16 v[0:5], v[0:15], v16 +; GFX950-SDAG-NEXT: global_store_dwordx2 v[18:19], v[4:5], off offset:16 +; GFX950-SDAG-NEXT: global_store_dwordx4 v[18:19], v[0:3], off +; GFX950-SDAG-NEXT: s_endpgm +; +; GFX950-GISEL-LABEL: test_scalef32_pk32_fp6_f16_vv: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: v_mov_b32_e32 v20, v17 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v21, v18 +; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_fp6_f16 v[0:5], v[0:15], v16 +; GFX950-GISEL-NEXT: global_store_dwordx4 v[20:21], v[0:3], off +; GFX950-GISEL-NEXT: global_store_dwordx2 v[20:21], v[4:5], off offset:16 +; GFX950-GISEL-NEXT: s_endpgm + %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.fp6.f16(<32 x half> %src, float %scale) + store <6 x i32> %cvt, ptr addrspace(1) %out, align 8 + ret void +} + +define amdgpu_ps void @test_scalef32_pk32_fp6_f16_sl(<32 x half> inreg %src, ptr addrspace(1) %out) { +; GFX950-SDAG-LABEL: test_scalef32_pk32_fp6_f16_sl: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, s1 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v4, s2 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v5, s3 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v6, s4 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v7, s5 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v8, s6 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v9, s7 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v10, s8 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v11, s9 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v12, s10 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v13, s11 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v14, s12 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v15, s13 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v16, s14 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v17, s15 +; GFX950-SDAG-NEXT: s_mov_b32 s0, 0x42c80000 +; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_fp6_f16 v[2:7], v[2:17], s0 +; GFX950-SDAG-NEXT: global_store_dwordx2 v[0:1], v[6:7], off offset:16 +; GFX950-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off +; GFX950-SDAG-NEXT: s_endpgm +; +; GFX950-GISEL-LABEL: test_scalef32_pk32_fp6_f16_sl: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[16:17], s[14:15] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[14:15], s[12:13] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[12:13], s[10:11] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[10:11], s[8:9] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[6:7] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[4:5] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3] +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX950-GISEL-NEXT: v_mov_b32_e32 v18, 0x42c80000 +; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_fp6_f16 v[2:7], v[2:17], v18 +; GFX950-GISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off +; GFX950-GISEL-NEXT: global_store_dwordx2 v[0:1], v[6:7], off offset:16 +; GFX950-GISEL-NEXT: s_endpgm + %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.fp6.f16(<32 x half> %src, float 100.0) + store <6 x i32> %cvt, ptr addrspace(1) %out, align 8 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.read.tr.gfx950.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.read.tr.gfx950.ll new file mode 100644 index 0000000000000..0689af0d56268 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.read.tr.gfx950.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX950-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX950-GISEL %s + +declare <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32.p3(ptr addrspace(3)) +declare <2 x i32> @llvm.amdgcn.ds.read.tr8.b64.v2i32.p3(ptr addrspace(3)) +declare <3 x i32> @llvm.amdgcn.ds.read.tr6.b64.v3i32.p3(ptr addrspace(3)) +declare <4 x i16> @llvm.amdgcn.ds.read.tr16.b64.v4i16.p3(ptr addrspace(3)) + +define amdgpu_ps void @ds_read_b64_tr_b4(ptr addrspace(3) %addr, ptr addrspace(1) %use) { +; GFX950-SDAG-LABEL: ds_read_b64_tr_b4: +; GFX950-SDAG: ; %bb.0: ; %entry +; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1 +; GFX950-SDAG-NEXT: ds_read_b64_tr_b4 v[0:1], v0 offset:32 +; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-SDAG-NEXT: global_store_dwordx2 v[2:3], v[0:1], off +; GFX950-SDAG-NEXT: s_endpgm +; +; GFX950-GISEL-LABEL: ds_read_b64_tr_b4: +; GFX950-GISEL: ; %bb.0: ; %entry +; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v1 +; GFX950-GISEL-NEXT: ds_read_b64_tr_b4 v[0:1], v0 offset:32 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v5, v2 +; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-GISEL-NEXT: global_store_dwordx2 v[4:5], v[0:1], off +; GFX950-GISEL-NEXT: s_endpgm +entry: + %gep = getelementptr i64, ptr addrspace(3) %addr, i32 4 + %val = call <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32.p3(ptr addrspace(3) %gep) + store <2 x i32> %val, ptr addrspace(1) %use + ret void +} + +define amdgpu_ps void @ds_read_b96_tr_b6(ptr addrspace(3) %addr, ptr addrspace(1) %use) { +; GFX950-SDAG-LABEL: ds_read_b96_tr_b6: +; GFX950-SDAG: ; %bb.0: ; %entry +; GFX950-SDAG-NEXT: v_mov_b32_e32 v5, v2 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v4, v1 +; GFX950-SDAG-NEXT: ds_read_b96_tr_b6 v[0:2], v0 offset:32 +; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-SDAG-NEXT: global_store_dwordx3 v[4:5], v[0:2], off +; GFX950-SDAG-NEXT: s_endpgm +; +; GFX950-GISEL-LABEL: ds_read_b96_tr_b6: +; GFX950-GISEL: ; %bb.0: ; %entry +; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v1 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v5, v2 +; GFX950-GISEL-NEXT: ds_read_b96_tr_b6 v[0:2], v0 offset:32 +; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-GISEL-NEXT: global_store_dwordx3 v[4:5], v[0:2], off +; GFX950-GISEL-NEXT: s_endpgm +entry: + %gep = getelementptr i64, ptr addrspace(3) %addr, i32 4 + %val = call <3 x i32> @llvm.amdgcn.ds.read.tr6.b96.v3i32.p3(ptr addrspace(3) %gep) + store <3 x i32> %val, ptr addrspace(1) %use + ret void +} + +define amdgpu_ps void @ds_read_b64_tr_b8(ptr addrspace(3) %addr, ptr addrspace(1) %use) { +; GFX950-SDAG-LABEL: ds_read_b64_tr_b8: +; GFX950-SDAG: ; %bb.0: ; %entry +; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1 +; GFX950-SDAG-NEXT: ds_read_b64_tr_b8 v[0:1], v0 offset:32 +; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-SDAG-NEXT: global_store_dwordx2 v[2:3], v[0:1], off +; GFX950-SDAG-NEXT: s_endpgm +; +; GFX950-GISEL-LABEL: ds_read_b64_tr_b8: +; GFX950-GISEL: ; %bb.0: ; %entry +; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v1 +; GFX950-GISEL-NEXT: ds_read_b64_tr_b8 v[0:1], v0 offset:32 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v5, v2 +; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-GISEL-NEXT: global_store_dwordx2 v[4:5], v[0:1], off +; GFX950-GISEL-NEXT: s_endpgm +entry: + %gep = getelementptr i64, ptr addrspace(3) %addr, i32 4 + %val = call <2 x i32> @llvm.amdgcn.ds.read.tr8.b64.v2i32.p3(ptr addrspace(3) %gep) + store <2 x i32> %val, ptr addrspace(1) %use + ret void +} + +define amdgpu_ps void @ds_read_b64_tr_b16(ptr addrspace(3) %addr, ptr addrspace(1) %use) { +; GFX950-SDAG-LABEL: ds_read_b64_tr_b16: +; GFX950-SDAG: ; %bb.0: ; %entry +; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1 +; GFX950-SDAG-NEXT: ds_read_b64_tr_b16 v[0:1], v0 offset:32 +; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-SDAG-NEXT: global_store_dwordx2 v[2:3], v[0:1], off +; GFX950-SDAG-NEXT: s_endpgm +; +; GFX950-GISEL-LABEL: ds_read_b64_tr_b16: +; GFX950-GISEL: ; %bb.0: ; %entry +; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v1 +; GFX950-GISEL-NEXT: ds_read_b64_tr_b16 v[0:1], v0 offset:32 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v5, v2 +; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-GISEL-NEXT: global_store_dwordx2 v[4:5], v[0:1], off +; GFX950-GISEL-NEXT: s_endpgm +entry: + %gep = getelementptr i64, ptr addrspace(3) %addr, i32 4 + %val = call <4 x i16> @llvm.amdgcn.ds.read.tr16.b64.v4i16.p3(ptr addrspace(3) %gep) + store <4 x i16> %val, ptr addrspace(1) %use + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll index b0ef568fbdce3..42acf089e8648 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11 +; RUN: llc -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX950 +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX950-ISEL declare float @llvm.amdgcn.fdot2.f32.bf16(<2 x bfloat> %a, <2 x bfloat> %b, float %c, i1 %clamp) @@ -18,6 +20,38 @@ define amdgpu_kernel void @test_llvm_amdgcn_fdot2_f32_bf16_clamp( ; GFX11-NEXT: v_dot2_f32_bf16 v0, s2, s3, v0 clamp ; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-NEXT: s_endpgm +; +; GFX950-LABEL: test_llvm_amdgcn_fdot2_f32_bf16_clamp: +; GFX950: ; %bb.0: ; %entry +; GFX950-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 +; GFX950-NEXT: v_mov_b32_e32 v0, 0 +; GFX950-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-NEXT: s_load_dword s0, s[12:13], 0x0 +; GFX950-NEXT: s_load_dword s1, s[14:15], 0x0 +; GFX950-NEXT: s_load_dword s2, s[10:11], 0x0 +; GFX950-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-NEXT: v_mov_b32_e32 v1, s0 +; GFX950-NEXT: v_mov_b32_e32 v2, s1 +; GFX950-NEXT: v_dot2_f32_bf16 v1, s2, v1, v2 clamp +; GFX950-NEXT: s_nop 2 +; GFX950-NEXT: global_store_dword v0, v1, s[8:9] +; GFX950-NEXT: s_endpgm +; +; GFX950-ISEL-LABEL: test_llvm_amdgcn_fdot2_f32_bf16_clamp: +; GFX950-ISEL: ; %bb.0: ; %entry +; GFX950-ISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 +; GFX950-ISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-ISEL-NEXT: s_load_dword s0, s[12:13], 0x0 +; GFX950-ISEL-NEXT: s_load_dword s1, s[14:15], 0x0 +; GFX950-ISEL-NEXT: s_load_dword s2, s[10:11], 0x0 +; GFX950-ISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX950-ISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX950-ISEL-NEXT: v_dot2_f32_bf16 v0, s2, v0, v1 clamp +; GFX950-ISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX950-ISEL-NEXT: s_nop 1 +; GFX950-ISEL-NEXT: global_store_dword v1, v0, s[8:9] +; GFX950-ISEL-NEXT: s_endpgm ptr addrspace(1) %r, ptr addrspace(1) %a, ptr addrspace(1) %b, @@ -46,6 +80,38 @@ define amdgpu_kernel void @test_llvm_amdgcn_fdot2_f32_bf16_no_clamp( ; GFX11-NEXT: v_dot2_f32_bf16 v0, s2, s3, v0 ; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-NEXT: s_endpgm +; +; GFX950-LABEL: test_llvm_amdgcn_fdot2_f32_bf16_no_clamp: +; GFX950: ; %bb.0: ; %entry +; GFX950-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 +; GFX950-NEXT: v_mov_b32_e32 v0, 0 +; GFX950-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-NEXT: s_load_dword s0, s[12:13], 0x0 +; GFX950-NEXT: s_load_dword s1, s[14:15], 0x0 +; GFX950-NEXT: s_load_dword s2, s[10:11], 0x0 +; GFX950-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-NEXT: v_mov_b32_e32 v1, s0 +; GFX950-NEXT: v_mov_b32_e32 v2, s1 +; GFX950-NEXT: v_dot2c_f32_bf16_e32 v2, s2, v1 +; GFX950-NEXT: s_nop 2 +; GFX950-NEXT: global_store_dword v0, v2, s[8:9] +; GFX950-NEXT: s_endpgm +; +; GFX950-ISEL-LABEL: test_llvm_amdgcn_fdot2_f32_bf16_no_clamp: +; GFX950-ISEL: ; %bb.0: ; %entry +; GFX950-ISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 +; GFX950-ISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-ISEL-NEXT: s_load_dword s0, s[12:13], 0x0 +; GFX950-ISEL-NEXT: s_load_dword s1, s[14:15], 0x0 +; GFX950-ISEL-NEXT: s_load_dword s2, s[10:11], 0x0 +; GFX950-ISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX950-ISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX950-ISEL-NEXT: v_dot2c_f32_bf16_e32 v1, s2, v0 +; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX950-ISEL-NEXT: s_nop 1 +; GFX950-ISEL-NEXT: global_store_dword v0, v1, s[8:9] +; GFX950-ISEL-NEXT: s_endpgm ptr addrspace(1) %r, ptr addrspace(1) %a, ptr addrspace(1) %b, diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll index d0ae669ffb3d6..5d149f7c0c62e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -march=amdgcn -mcpu=gfx950 -global-isel=0 < %s | FileCheck -enable-var-scope --check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=gfx950 -global-isel=1 < %s | FileCheck -enable-var-scope --check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx950 -global-isel=0 < %s | FileCheck -enable-var-scope --check-prefixes=GCN,SDAG %s +; RUN: llc -march=amdgcn -mcpu=gfx950 -global-isel=1 < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GISEL %s declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.f16(<8 x half>, <8 x half>, <4 x float>, i32 immarg, i32 immarg, i32 immarg) declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.f16(<8 x half>, <8 x half>, <16 x float>, i32 immarg, i32 immarg, i32 immarg) @@ -49,52 +49,366 @@ define <4 x float> @test_mfma_f32_16x16x32_f16__flags(<8 x half> %arg0, <8 x hal ret <4 x float> %result } -define <4 x float> @test_mfma_f32_16x16x32_f16__mac(<4 x float> %arg2, <8 x half> %arg0, <8 x half> %arg1) { -; GCN-LABEL: test_mfma_f32_16x16x32_f16__mac: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_accvgpr_write_b32 a0, v0 -; GCN-NEXT: v_accvgpr_write_b32 a1, v1 -; GCN-NEXT: v_accvgpr_write_b32 a2, v2 -; GCN-NEXT: v_accvgpr_write_b32 a3, v3 -; GCN-NEXT: s_nop 1 -; GCN-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[4:7], v[8:11], a[0:3] -; GCN-NEXT: s_nop 6 -; GCN-NEXT: v_accvgpr_read_b32 v0, a0 -; GCN-NEXT: v_accvgpr_read_b32 v1, a1 -; GCN-NEXT: v_accvgpr_read_b32 v2, a2 -; GCN-NEXT: v_accvgpr_read_b32 v3, a3 -; GCN-NEXT: s_setpc_b64 s[30:31] +define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd(ptr addrspace(1) %out, <8 x half> %arg0, <8 x half> %arg1, <4 x float> %arg2) #0 { +; SDAG-LABEL: test_mfma_f32_16x16x32_f16_no_agpr__vgprcd: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 +; SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; SDAG-NEXT: v_mov_b32_e32 v8, 0 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; SDAG-NEXT: v_accvgpr_write_b32 a0, s0 +; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; SDAG-NEXT: v_accvgpr_write_b32 a1, s1 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s2 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s3 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3] +; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7] +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: test_mfma_f32_16x16x32_f16_no_agpr__vgprcd: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 +; GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s0 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s1 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s2 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s3 +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3] +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: s_nop 5 +; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7] +; GISEL-NEXT: s_endpgm %result = call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.f16(<8 x half> %arg0, <8 x half> %arg1, <4 x float> %arg2, i32 0, i32 0, i32 0) - ret <4 x float> %result + store <4 x float> %result, ptr addrspace(1) %out + ret void } -define <4 x float> @test_mfma_f32_16x16x32_f16___flags__mac(<4 x float> %arg2, <8 x half> %arg0, <8 x half> %arg1) { -; GCN-LABEL: test_mfma_f32_16x16x32_f16___flags__mac: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_accvgpr_write_b32 a0, v0 -; GCN-NEXT: v_accvgpr_write_b32 a1, v1 -; GCN-NEXT: v_accvgpr_write_b32 a2, v2 -; GCN-NEXT: v_accvgpr_write_b32 a3, v3 -; GCN-NEXT: s_nop 1 -; GCN-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[4:7], v[8:11], a[0:3] cbsz:1 abid:1 blgp:1 -; GCN-NEXT: s_nop 6 -; GCN-NEXT: v_accvgpr_read_b32 v0, a0 -; GCN-NEXT: v_accvgpr_read_b32 v1, a1 -; GCN-NEXT: v_accvgpr_read_b32 v2, a2 -; GCN-NEXT: v_accvgpr_read_b32 v3, a3 -; GCN-NEXT: s_setpc_b64 s[30:31] - %result = call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.f16(<8 x half> %arg0, <8 x half> %arg1, <4 x float> %arg2, i32 1, i32 1, i32 1) - ret <4 x float> %result +define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags(ptr addrspace(1) %out, <8 x half> %arg0, <8 x half> %arg1, <4 x float> %arg2) #0 { +; SDAG-LABEL: test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 +; SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; SDAG-NEXT: v_mov_b32_e32 v8, 0 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; SDAG-NEXT: v_accvgpr_write_b32 a0, s0 +; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; SDAG-NEXT: v_accvgpr_write_b32 a1, s1 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s2 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s3 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1 +; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7] +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 +; GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s0 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s1 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s2 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s3 +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1 +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: s_nop 5 +; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7] +; GISEL-NEXT: s_endpgm + %result = call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.f16(<8 x half> %arg0, <8 x half> %arg1, <4 x float> %arg2, i32 3, i32 2, i32 1) + store <4 x float> %result, ptr addrspace(1) %out + ret void } ; -------------------------------------------------------------------- ; llvm.amdgcn.mfma.f32.32x32x16.f16 ; -------------------------------------------------------------------- -define <16 x float> @test_mfma_f32_32x32x16_f16(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2) { -; GCN-LABEL: test_mfma_f32_32x32x16_f16: +define amdgpu_kernel void @test_mfma_f32_32x32x16_f16(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2) #1 { +; SDAG-LABEL: test_mfma_f32_32x32x16_f16: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; SDAG-NEXT: v_mov_b64_e32 v[12:13], 48 +; SDAG-NEXT: v_mov_b64_e32 v[14:15], 32 +; SDAG-NEXT: v_mov_b64_e32 v[16:17], 16 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; SDAG-NEXT: v_accvgpr_write_b32 a0, s8 +; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; SDAG-NEXT: v_accvgpr_write_b32 a1, s9 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s10 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s11 +; SDAG-NEXT: v_accvgpr_write_b32 a4, s12 +; SDAG-NEXT: v_accvgpr_write_b32 a5, s13 +; SDAG-NEXT: v_accvgpr_write_b32 a6, s14 +; SDAG-NEXT: v_accvgpr_write_b32 a7, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a8, s16 +; SDAG-NEXT: v_accvgpr_write_b32 a9, s17 +; SDAG-NEXT: v_accvgpr_write_b32 a10, s18 +; SDAG-NEXT: v_accvgpr_write_b32 a11, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a12, s20 +; SDAG-NEXT: v_accvgpr_write_b32 a13, s21 +; SDAG-NEXT: v_accvgpr_write_b32 a14, s22 +; SDAG-NEXT: v_accvgpr_write_b32 a15, s23 +; SDAG-NEXT: v_mov_b64_e32 v[18:19], 0 +; SDAG-NEXT: v_mov_b32_e32 v8, s16 +; SDAG-NEXT: v_mfma_f32_32x32x16_f16 a[16:31], v[0:3], v[4:7], a[0:15] +; SDAG-NEXT: v_mov_b32_e32 v0, s20 +; SDAG-NEXT: v_mov_b32_e32 v1, s21 +; SDAG-NEXT: v_mov_b32_e32 v2, s22 +; SDAG-NEXT: v_mov_b32_e32 v3, s23 +; SDAG-NEXT: v_mov_b32_e32 v9, s17 +; SDAG-NEXT: v_mov_b32_e32 v10, s18 +; SDAG-NEXT: v_mov_b32_e32 v11, s19 +; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: global_store_dwordx4 v[12:13], a[28:31], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[14:15], a[24:27], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[16:17], a[20:23], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[18:19], a[16:19], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[14:15], v[8:11], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[12:13], v[0:3], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mov_b32_e32 v0, s8 +; SDAG-NEXT: v_mov_b32_e32 v1, s9 +; SDAG-NEXT: v_mov_b32_e32 v2, s10 +; SDAG-NEXT: v_mov_b32_e32 v3, s11 +; SDAG-NEXT: global_store_dwordx4 v[18:19], v[0:3], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mov_b32_e32 v0, s12 +; SDAG-NEXT: v_mov_b32_e32 v1, s13 +; SDAG-NEXT: v_mov_b32_e32 v2, s14 +; SDAG-NEXT: v_mov_b32_e32 v3, s15 +; SDAG-NEXT: global_store_dwordx4 v[16:17], v[0:3], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: test_mfma_f32_32x32x16_f16: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GISEL-NEXT: v_mov_b64_e32 v[20:21], 0 +; GISEL-NEXT: v_mov_b64_e32 v[26:27], 48 +; GISEL-NEXT: v_mov_b64_e32 v[22:23], 16 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s9 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s10 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s11 +; GISEL-NEXT: v_accvgpr_write_b32 a4, s12 +; GISEL-NEXT: v_accvgpr_write_b32 a5, s13 +; GISEL-NEXT: v_accvgpr_write_b32 a6, s14 +; GISEL-NEXT: v_accvgpr_write_b32 a7, s15 +; GISEL-NEXT: v_accvgpr_write_b32 a8, s16 +; GISEL-NEXT: v_accvgpr_write_b32 a9, s17 +; GISEL-NEXT: v_accvgpr_write_b32 a10, s18 +; GISEL-NEXT: v_accvgpr_write_b32 a11, s19 +; GISEL-NEXT: v_accvgpr_write_b32 a12, s20 +; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 +; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 +; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[12:13] +; GISEL-NEXT: v_mfma_f32_32x32x16_f16 a[16:31], v[0:3], v[4:7], a[0:15] +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[22:23] +; GISEL-NEXT: v_mov_b64_e32 v[24:25], 32 +; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19] +; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: global_store_dwordx4 v[20:21], a[16:19], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[22:23], a[20:23], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[24:25], a[24:27], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[26:27], a[28:31], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[20:21], v[8:11], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[22:23], v[12:15], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[24:25], v[16:19], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[26:27], v[0:3], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_endpgm + %result = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.f16(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2, i32 0, i32 0, i32 0) + store volatile <16 x float> %result, ptr addrspace(1) null + store volatile <16 x float> %arg2, ptr addrspace(1) null + ret void +} + +define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__flags(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2) #1 { +; SDAG-LABEL: test_mfma_f32_32x32x16_f16__flags: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; SDAG-NEXT: v_mov_b64_e32 v[12:13], 48 +; SDAG-NEXT: v_mov_b64_e32 v[14:15], 32 +; SDAG-NEXT: v_mov_b64_e32 v[16:17], 16 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; SDAG-NEXT: v_accvgpr_write_b32 a0, s8 +; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; SDAG-NEXT: v_accvgpr_write_b32 a1, s9 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s10 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s11 +; SDAG-NEXT: v_accvgpr_write_b32 a4, s12 +; SDAG-NEXT: v_accvgpr_write_b32 a5, s13 +; SDAG-NEXT: v_accvgpr_write_b32 a6, s14 +; SDAG-NEXT: v_accvgpr_write_b32 a7, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a8, s16 +; SDAG-NEXT: v_accvgpr_write_b32 a9, s17 +; SDAG-NEXT: v_accvgpr_write_b32 a10, s18 +; SDAG-NEXT: v_accvgpr_write_b32 a11, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a12, s20 +; SDAG-NEXT: v_accvgpr_write_b32 a13, s21 +; SDAG-NEXT: v_accvgpr_write_b32 a14, s22 +; SDAG-NEXT: v_accvgpr_write_b32 a15, s23 +; SDAG-NEXT: v_mov_b64_e32 v[18:19], 0 +; SDAG-NEXT: v_mov_b32_e32 v8, s16 +; SDAG-NEXT: v_mfma_f32_32x32x16_f16 a[16:31], v[0:3], v[4:7], a[0:15] cbsz:2 abid:3 blgp:1 +; SDAG-NEXT: v_mov_b32_e32 v0, s20 +; SDAG-NEXT: v_mov_b32_e32 v1, s21 +; SDAG-NEXT: v_mov_b32_e32 v2, s22 +; SDAG-NEXT: v_mov_b32_e32 v3, s23 +; SDAG-NEXT: v_mov_b32_e32 v9, s17 +; SDAG-NEXT: v_mov_b32_e32 v10, s18 +; SDAG-NEXT: v_mov_b32_e32 v11, s19 +; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: global_store_dwordx4 v[12:13], a[28:31], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[14:15], a[24:27], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[16:17], a[20:23], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[18:19], a[16:19], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[14:15], v[8:11], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[12:13], v[0:3], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mov_b32_e32 v0, s8 +; SDAG-NEXT: v_mov_b32_e32 v1, s9 +; SDAG-NEXT: v_mov_b32_e32 v2, s10 +; SDAG-NEXT: v_mov_b32_e32 v3, s11 +; SDAG-NEXT: global_store_dwordx4 v[18:19], v[0:3], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mov_b32_e32 v0, s12 +; SDAG-NEXT: v_mov_b32_e32 v1, s13 +; SDAG-NEXT: v_mov_b32_e32 v2, s14 +; SDAG-NEXT: v_mov_b32_e32 v3, s15 +; SDAG-NEXT: global_store_dwordx4 v[16:17], v[0:3], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: test_mfma_f32_32x32x16_f16__flags: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GISEL-NEXT: v_mov_b64_e32 v[20:21], 0 +; GISEL-NEXT: v_mov_b64_e32 v[26:27], 48 +; GISEL-NEXT: v_mov_b64_e32 v[22:23], 16 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s9 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s10 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s11 +; GISEL-NEXT: v_accvgpr_write_b32 a4, s12 +; GISEL-NEXT: v_accvgpr_write_b32 a5, s13 +; GISEL-NEXT: v_accvgpr_write_b32 a6, s14 +; GISEL-NEXT: v_accvgpr_write_b32 a7, s15 +; GISEL-NEXT: v_accvgpr_write_b32 a8, s16 +; GISEL-NEXT: v_accvgpr_write_b32 a9, s17 +; GISEL-NEXT: v_accvgpr_write_b32 a10, s18 +; GISEL-NEXT: v_accvgpr_write_b32 a11, s19 +; GISEL-NEXT: v_accvgpr_write_b32 a12, s20 +; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 +; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 +; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[12:13] +; GISEL-NEXT: v_mfma_f32_32x32x16_f16 a[16:31], v[0:3], v[4:7], a[0:15] cbsz:2 abid:3 blgp:1 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[22:23] +; GISEL-NEXT: v_mov_b64_e32 v[24:25], 32 +; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19] +; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: global_store_dwordx4 v[20:21], a[16:19], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[22:23], a[20:23], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[24:25], a[24:27], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[26:27], a[28:31], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[20:21], v[8:11], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[22:23], v[12:15], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[24:25], v[16:19], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[26:27], v[0:3], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_endpgm + %result = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.f16(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2, i32 2, i32 3, i32 1) + store volatile <16 x float> %result, ptr addrspace(1) null + store volatile <16 x float> %arg2, ptr addrspace(1) null + ret void +} + +define <16 x float> @test_mfma_f32_32x32x16_f16__mac(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2) { +; GCN-LABEL: test_mfma_f32_32x32x16_f16__mac: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_accvgpr_write_b32 a0, v8 @@ -138,8 +452,8 @@ define <16 x float> @test_mfma_f32_32x32x16_f16(<8 x half> %arg0, <8 x half> %ar ret <16 x float> %result } -define <16 x float> @test_mfma_f32_32x32x16_f16__flags(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2) { -; GCN-LABEL: test_mfma_f32_32x32x16_f16__flags: +define <16 x float> @test_mfma_f32_32x32x16_f16__mac__flags(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2) { +; GCN-LABEL: test_mfma_f32_32x32x16_f16__mac__flags: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_accvgpr_write_b32 a0, v8 @@ -183,155 +497,327 @@ define <16 x float> @test_mfma_f32_32x32x16_f16__flags(<8 x half> %arg0, <8 x ha ret <16 x float> %result } -define <16 x float> @test_mfma_f32_32x32x16_f16__mac(<16 x float> %arg2, <8 x half> %arg0, <8 x half> %arg1) { -; GCN-LABEL: test_mfma_f32_32x32x16_f16__mac: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_accvgpr_write_b32 a0, v0 -; GCN-NEXT: v_accvgpr_write_b32 a1, v1 -; GCN-NEXT: v_accvgpr_write_b32 a2, v2 -; GCN-NEXT: v_accvgpr_write_b32 a3, v3 -; GCN-NEXT: v_accvgpr_write_b32 a4, v4 -; GCN-NEXT: v_accvgpr_write_b32 a5, v5 -; GCN-NEXT: v_accvgpr_write_b32 a6, v6 -; GCN-NEXT: v_accvgpr_write_b32 a7, v7 -; GCN-NEXT: v_accvgpr_write_b32 a8, v8 -; GCN-NEXT: v_accvgpr_write_b32 a9, v9 -; GCN-NEXT: v_accvgpr_write_b32 a10, v10 -; GCN-NEXT: v_accvgpr_write_b32 a11, v11 -; GCN-NEXT: v_accvgpr_write_b32 a12, v12 -; GCN-NEXT: v_accvgpr_write_b32 a13, v13 -; GCN-NEXT: v_accvgpr_write_b32 a14, v14 -; GCN-NEXT: v_accvgpr_write_b32 a15, v15 -; GCN-NEXT: s_nop 1 -; GCN-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[16:19], v[20:23], a[0:15] -; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 -; GCN-NEXT: v_accvgpr_read_b32 v0, a0 -; GCN-NEXT: v_accvgpr_read_b32 v1, a1 -; GCN-NEXT: v_accvgpr_read_b32 v2, a2 -; GCN-NEXT: v_accvgpr_read_b32 v3, a3 -; GCN-NEXT: v_accvgpr_read_b32 v4, a4 -; GCN-NEXT: v_accvgpr_read_b32 v5, a5 -; GCN-NEXT: v_accvgpr_read_b32 v6, a6 -; GCN-NEXT: v_accvgpr_read_b32 v7, a7 -; GCN-NEXT: v_accvgpr_read_b32 v8, a8 -; GCN-NEXT: v_accvgpr_read_b32 v9, a9 -; GCN-NEXT: v_accvgpr_read_b32 v10, a10 -; GCN-NEXT: v_accvgpr_read_b32 v11, a11 -; GCN-NEXT: v_accvgpr_read_b32 v12, a12 -; GCN-NEXT: v_accvgpr_read_b32 v13, a13 -; GCN-NEXT: v_accvgpr_read_b32 v14, a14 -; GCN-NEXT: v_accvgpr_read_b32 v15, a15 -; GCN-NEXT: s_setpc_b64 s[30:31] +define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2, ptr addrspace(1) %out) #0 { +; SDAG-LABEL: test_mfma_f32_32x32x16_f16__vgprcd: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 +; SDAG-NEXT: v_mov_b32_e32 v12, 0 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; SDAG-NEXT: v_accvgpr_write_b32 a31, s23 +; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; SDAG-NEXT: v_accvgpr_write_b32 a30, s22 +; SDAG-NEXT: v_accvgpr_write_b32 a29, s21 +; SDAG-NEXT: v_accvgpr_write_b32 a28, s20 +; SDAG-NEXT: v_accvgpr_write_b32 a27, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a26, s18 +; SDAG-NEXT: v_accvgpr_write_b32 a25, s17 +; SDAG-NEXT: v_accvgpr_write_b32 a24, s16 +; SDAG-NEXT: v_accvgpr_write_b32 a23, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a22, s14 +; SDAG-NEXT: v_accvgpr_write_b32 a21, s13 +; SDAG-NEXT: v_accvgpr_write_b32 a20, s12 +; SDAG-NEXT: v_accvgpr_write_b32 a19, s11 +; SDAG-NEXT: v_accvgpr_write_b32 a18, s10 +; SDAG-NEXT: v_accvgpr_write_b32 a17, s9 +; SDAG-NEXT: v_accvgpr_write_b32 a16, s8 +; SDAG-NEXT: v_mov_b32_e32 v8, s20 +; SDAG-NEXT: v_mov_b32_e32 v9, s21 +; SDAG-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[16:31] +; SDAG-NEXT: v_mov_b32_e32 v10, s22 +; SDAG-NEXT: v_mov_b32_e32 v11, s23 +; SDAG-NEXT: v_mov_b32_e32 v0, s16 +; SDAG-NEXT: v_mov_b32_e32 v1, s17 +; SDAG-NEXT: v_mov_b32_e32 v2, s18 +; SDAG-NEXT: v_mov_b32_e32 v3, s19 +; SDAG-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:48 sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:32 sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mov_b32_e32 v0, s12 +; SDAG-NEXT: v_mov_b32_e32 v1, s13 +; SDAG-NEXT: v_mov_b32_e32 v2, s14 +; SDAG-NEXT: v_mov_b32_e32 v3, s15 +; SDAG-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mov_b32_e32 v0, s8 +; SDAG-NEXT: v_mov_b32_e32 v1, s9 +; SDAG-NEXT: v_mov_b32_e32 v2, s10 +; SDAG-NEXT: v_mov_b32_e32 v3, s11 +; SDAG-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v12, a[8:11], s[0:1] offset:32 sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v12, a[12:15], s[0:1] offset:48 sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v12, a[0:3], s[0:1] sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v12, a[4:7], s[0:1] offset:16 sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: test_mfma_f32_32x32x16_f16__vgprcd: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 +; GISEL-NEXT: v_mov_b32_e32 v24, 0 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s9 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s10 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s11 +; GISEL-NEXT: v_accvgpr_write_b32 a4, s12 +; GISEL-NEXT: v_accvgpr_write_b32 a5, s13 +; GISEL-NEXT: v_accvgpr_write_b32 a6, s14 +; GISEL-NEXT: v_accvgpr_write_b32 a7, s15 +; GISEL-NEXT: v_accvgpr_write_b32 a8, s16 +; GISEL-NEXT: v_accvgpr_write_b32 a9, s17 +; GISEL-NEXT: v_accvgpr_write_b32 a10, s18 +; GISEL-NEXT: v_accvgpr_write_b32 a11, s19 +; GISEL-NEXT: v_accvgpr_write_b32 a12, s20 +; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 +; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 +; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11] +; GISEL-NEXT: v_mfma_f32_32x32x16_f16 a[16:31], v[0:3], v[4:7], a[0:15] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[22:23] +; GISEL-NEXT: global_store_dwordx4 v24, v[8:11], s[0:1] sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, v[12:15], s[0:1] offset:16 sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, v[16:19], s[0:1] offset:32 sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, v[20:23], s[0:1] offset:48 sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, a[16:19], s[0:1] sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, a[20:23], s[0:1] offset:16 sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, a[24:27], s[0:1] offset:32 sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, a[28:31], s[0:1] offset:48 sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_endpgm %result = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.f16(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2, i32 0, i32 0, i32 0) - ret <16 x float> %result + store volatile <16 x float> %arg2, ptr addrspace(1) %out + store volatile <16 x float> %result, ptr addrspace(1) %out + ret void } -define <16 x float> @test_mfma_f32_32x32x16_f16__flags__mac(<16 x float> %arg2, <8 x half> %arg0, <8 x half> %arg1) { -; GCN-LABEL: test_mfma_f32_32x32x16_f16__flags__mac: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_accvgpr_write_b32 a0, v0 -; GCN-NEXT: v_accvgpr_write_b32 a1, v1 -; GCN-NEXT: v_accvgpr_write_b32 a2, v2 -; GCN-NEXT: v_accvgpr_write_b32 a3, v3 -; GCN-NEXT: v_accvgpr_write_b32 a4, v4 -; GCN-NEXT: v_accvgpr_write_b32 a5, v5 -; GCN-NEXT: v_accvgpr_write_b32 a6, v6 -; GCN-NEXT: v_accvgpr_write_b32 a7, v7 -; GCN-NEXT: v_accvgpr_write_b32 a8, v8 -; GCN-NEXT: v_accvgpr_write_b32 a9, v9 -; GCN-NEXT: v_accvgpr_write_b32 a10, v10 -; GCN-NEXT: v_accvgpr_write_b32 a11, v11 -; GCN-NEXT: v_accvgpr_write_b32 a12, v12 -; GCN-NEXT: v_accvgpr_write_b32 a13, v13 -; GCN-NEXT: v_accvgpr_write_b32 a14, v14 -; GCN-NEXT: v_accvgpr_write_b32 a15, v15 -; GCN-NEXT: s_nop 1 -; GCN-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[16:19], v[20:23], a[0:15] cbsz:1 abid:1 blgp:1 -; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 -; GCN-NEXT: v_accvgpr_read_b32 v0, a0 -; GCN-NEXT: v_accvgpr_read_b32 v1, a1 -; GCN-NEXT: v_accvgpr_read_b32 v2, a2 -; GCN-NEXT: v_accvgpr_read_b32 v3, a3 -; GCN-NEXT: v_accvgpr_read_b32 v4, a4 -; GCN-NEXT: v_accvgpr_read_b32 v5, a5 -; GCN-NEXT: v_accvgpr_read_b32 v6, a6 -; GCN-NEXT: v_accvgpr_read_b32 v7, a7 -; GCN-NEXT: v_accvgpr_read_b32 v8, a8 -; GCN-NEXT: v_accvgpr_read_b32 v9, a9 -; GCN-NEXT: v_accvgpr_read_b32 v10, a10 -; GCN-NEXT: v_accvgpr_read_b32 v11, a11 -; GCN-NEXT: v_accvgpr_read_b32 v12, a12 -; GCN-NEXT: v_accvgpr_read_b32 v13, a13 -; GCN-NEXT: v_accvgpr_read_b32 v14, a14 -; GCN-NEXT: v_accvgpr_read_b32 v15, a15 -; GCN-NEXT: s_setpc_b64 s[30:31] - %result = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.f16(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2, i32 1, i32 1, i32 1) - ret <16 x float> %result +define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd__flags(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2, ptr addrspace(1) %out) #0 { +; SDAG-LABEL: test_mfma_f32_32x32x16_f16__vgprcd__flags: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 +; SDAG-NEXT: v_mov_b32_e32 v12, 0 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; SDAG-NEXT: v_accvgpr_write_b32 a31, s23 +; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; SDAG-NEXT: v_accvgpr_write_b32 a30, s22 +; SDAG-NEXT: v_accvgpr_write_b32 a29, s21 +; SDAG-NEXT: v_accvgpr_write_b32 a28, s20 +; SDAG-NEXT: v_accvgpr_write_b32 a27, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a26, s18 +; SDAG-NEXT: v_accvgpr_write_b32 a25, s17 +; SDAG-NEXT: v_accvgpr_write_b32 a24, s16 +; SDAG-NEXT: v_accvgpr_write_b32 a23, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a22, s14 +; SDAG-NEXT: v_accvgpr_write_b32 a21, s13 +; SDAG-NEXT: v_accvgpr_write_b32 a20, s12 +; SDAG-NEXT: v_accvgpr_write_b32 a19, s11 +; SDAG-NEXT: v_accvgpr_write_b32 a18, s10 +; SDAG-NEXT: v_accvgpr_write_b32 a17, s9 +; SDAG-NEXT: v_accvgpr_write_b32 a16, s8 +; SDAG-NEXT: v_mov_b32_e32 v8, s20 +; SDAG-NEXT: v_mov_b32_e32 v9, s21 +; SDAG-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[16:31] cbsz:1 abid:2 blgp:3 +; SDAG-NEXT: v_mov_b32_e32 v10, s22 +; SDAG-NEXT: v_mov_b32_e32 v11, s23 +; SDAG-NEXT: v_mov_b32_e32 v0, s16 +; SDAG-NEXT: v_mov_b32_e32 v1, s17 +; SDAG-NEXT: v_mov_b32_e32 v2, s18 +; SDAG-NEXT: v_mov_b32_e32 v3, s19 +; SDAG-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:48 sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:32 sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mov_b32_e32 v0, s12 +; SDAG-NEXT: v_mov_b32_e32 v1, s13 +; SDAG-NEXT: v_mov_b32_e32 v2, s14 +; SDAG-NEXT: v_mov_b32_e32 v3, s15 +; SDAG-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mov_b32_e32 v0, s8 +; SDAG-NEXT: v_mov_b32_e32 v1, s9 +; SDAG-NEXT: v_mov_b32_e32 v2, s10 +; SDAG-NEXT: v_mov_b32_e32 v3, s11 +; SDAG-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v12, a[8:11], s[0:1] offset:32 sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v12, a[12:15], s[0:1] offset:48 sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v12, a[0:3], s[0:1] sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v12, a[4:7], s[0:1] offset:16 sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: test_mfma_f32_32x32x16_f16__vgprcd__flags: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 +; GISEL-NEXT: v_mov_b32_e32 v24, 0 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s9 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s10 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s11 +; GISEL-NEXT: v_accvgpr_write_b32 a4, s12 +; GISEL-NEXT: v_accvgpr_write_b32 a5, s13 +; GISEL-NEXT: v_accvgpr_write_b32 a6, s14 +; GISEL-NEXT: v_accvgpr_write_b32 a7, s15 +; GISEL-NEXT: v_accvgpr_write_b32 a8, s16 +; GISEL-NEXT: v_accvgpr_write_b32 a9, s17 +; GISEL-NEXT: v_accvgpr_write_b32 a10, s18 +; GISEL-NEXT: v_accvgpr_write_b32 a11, s19 +; GISEL-NEXT: v_accvgpr_write_b32 a12, s20 +; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 +; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 +; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11] +; GISEL-NEXT: v_mfma_f32_32x32x16_f16 a[16:31], v[0:3], v[4:7], a[0:15] cbsz:1 abid:2 blgp:3 +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[22:23] +; GISEL-NEXT: global_store_dwordx4 v24, v[8:11], s[0:1] sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, v[12:15], s[0:1] offset:16 sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, v[16:19], s[0:1] offset:32 sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, v[20:23], s[0:1] offset:48 sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, a[16:19], s[0:1] sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, a[20:23], s[0:1] offset:16 sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, a[24:27], s[0:1] offset:32 sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, a[28:31], s[0:1] offset:48 sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_endpgm + %result = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.f16(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2, i32 1, i32 2, i32 3) + store volatile <16 x float> %arg2, ptr addrspace(1) %out + store volatile <16 x float> %result, ptr addrspace(1) %out + ret void } define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd_mac(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2, ptr addrspace(1) %out) #0 { ; SDAG-LABEL: test_mfma_f32_32x32x16_f16__vgprcd_mac: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx8 s[20:27], s[0:1], 0x24 -; SDAG-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[20:21] -; SDAG-NEXT: v_mov_b64_e32 v[18:19], s[22:23] -; SDAG-NEXT: v_mov_b64_e32 v[20:21], s[24:25] -; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; SDAG-NEXT: v_mov_b64_e32 v[22:23], s[26:27] -; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[14:15] -; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[16:17] -; SDAG-NEXT: v_mov_b64_e32 v[14:15], s[18:19] -; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa4 -; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[16:19], v[20:23], v[0:15] -; SDAG-NEXT: v_mov_b32_e32 v16, 0 +; SDAG-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; SDAG-NEXT: v_accvgpr_write_b32 a0, s8 +; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; SDAG-NEXT: v_accvgpr_write_b32 a1, s9 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s10 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s11 +; SDAG-NEXT: v_accvgpr_write_b32 a4, s12 +; SDAG-NEXT: v_accvgpr_write_b32 a5, s13 +; SDAG-NEXT: v_accvgpr_write_b32 a6, s14 +; SDAG-NEXT: v_accvgpr_write_b32 a7, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a8, s16 +; SDAG-NEXT: v_accvgpr_write_b32 a9, s17 +; SDAG-NEXT: v_accvgpr_write_b32 a10, s18 +; SDAG-NEXT: v_accvgpr_write_b32 a11, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a12, s20 +; SDAG-NEXT: v_accvgpr_write_b32 a13, s21 +; SDAG-NEXT: v_accvgpr_write_b32 a14, s22 +; SDAG-NEXT: v_accvgpr_write_b32 a15, s23 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15] +; SDAG-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48 -; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 -; SDAG-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 -; SDAG-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 +; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 +; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 +; SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_f32_32x32x16_f16__vgprcd_mac: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_load_dwordx8 s[20:27], s[0:1], 0x24 -; GISEL-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[20:21] -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[22:23] -; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[24:25] -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[26:27] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[18:19] -; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa4 -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[16:19], v[20:23], v[0:15] -; GISEL-NEXT: v_mov_b32_e32 v16, 0 +; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s9 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s10 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s11 +; GISEL-NEXT: v_accvgpr_write_b32 a4, s12 +; GISEL-NEXT: v_accvgpr_write_b32 a5, s13 +; GISEL-NEXT: v_accvgpr_write_b32 a6, s14 +; GISEL-NEXT: v_accvgpr_write_b32 a7, s15 +; GISEL-NEXT: v_accvgpr_write_b32 a8, s16 +; GISEL-NEXT: v_accvgpr_write_b32 a9, s17 +; GISEL-NEXT: v_accvgpr_write_b32 a10, s18 +; GISEL-NEXT: v_accvgpr_write_b32 a11, s19 +; GISEL-NEXT: v_accvgpr_write_b32 a12, s20 +; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 +; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 +; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15] +; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] -; GISEL-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 -; GISEL-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 -; GISEL-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48 +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] +; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 +; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 +; GISEL-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 ; GISEL-NEXT: s_endpgm %result = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.f16(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2, i32 0, i32 0, i32 0) store <16 x float> %result, ptr addrspace(1) %out @@ -341,62 +827,76 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd_mac(<8 x half> %ar define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd_mac_flags(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2, ptr addrspace(1) %out) #0 { ; SDAG-LABEL: test_mfma_f32_32x32x16_f16__vgprcd_mac_flags: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx8 s[20:27], s[0:1], 0x24 -; SDAG-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[20:21] -; SDAG-NEXT: v_mov_b64_e32 v[18:19], s[22:23] -; SDAG-NEXT: v_mov_b64_e32 v[20:21], s[24:25] -; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; SDAG-NEXT: v_mov_b64_e32 v[22:23], s[26:27] -; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[14:15] -; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[16:17] -; SDAG-NEXT: v_mov_b64_e32 v[14:15], s[18:19] -; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa4 -; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[16:19], v[20:23], v[0:15] cbsz:3 abid:2 blgp:1 -; SDAG-NEXT: v_mov_b32_e32 v16, 0 +; SDAG-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; SDAG-NEXT: v_accvgpr_write_b32 a0, s8 +; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; SDAG-NEXT: v_accvgpr_write_b32 a1, s9 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s10 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s11 +; SDAG-NEXT: v_accvgpr_write_b32 a4, s12 +; SDAG-NEXT: v_accvgpr_write_b32 a5, s13 +; SDAG-NEXT: v_accvgpr_write_b32 a6, s14 +; SDAG-NEXT: v_accvgpr_write_b32 a7, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a8, s16 +; SDAG-NEXT: v_accvgpr_write_b32 a9, s17 +; SDAG-NEXT: v_accvgpr_write_b32 a10, s18 +; SDAG-NEXT: v_accvgpr_write_b32 a11, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a12, s20 +; SDAG-NEXT: v_accvgpr_write_b32 a13, s21 +; SDAG-NEXT: v_accvgpr_write_b32 a14, s22 +; SDAG-NEXT: v_accvgpr_write_b32 a15, s23 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1 +; SDAG-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48 -; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 -; SDAG-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 -; SDAG-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 +; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 +; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 +; SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_f32_32x32x16_f16__vgprcd_mac_flags: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_load_dwordx8 s[20:27], s[0:1], 0x24 -; GISEL-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[20:21] -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[22:23] -; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[24:25] -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[26:27] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[18:19] -; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa4 -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[16:19], v[20:23], v[0:15] cbsz:3 abid:2 blgp:1 -; GISEL-NEXT: v_mov_b32_e32 v16, 0 +; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s9 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s10 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s11 +; GISEL-NEXT: v_accvgpr_write_b32 a4, s12 +; GISEL-NEXT: v_accvgpr_write_b32 a5, s13 +; GISEL-NEXT: v_accvgpr_write_b32 a6, s14 +; GISEL-NEXT: v_accvgpr_write_b32 a7, s15 +; GISEL-NEXT: v_accvgpr_write_b32 a8, s16 +; GISEL-NEXT: v_accvgpr_write_b32 a9, s17 +; GISEL-NEXT: v_accvgpr_write_b32 a10, s18 +; GISEL-NEXT: v_accvgpr_write_b32 a11, s19 +; GISEL-NEXT: v_accvgpr_write_b32 a12, s20 +; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 +; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 +; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1 +; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] -; GISEL-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 -; GISEL-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 -; GISEL-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48 +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] +; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 +; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 +; GISEL-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 ; GISEL-NEXT: s_endpgm %result = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.f16(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2, i32 3, i32 2, i32 1) store <16 x float> %result, ptr addrspace(1) %out @@ -419,7 +919,7 @@ define <4 x i32> @test_mfma_i32_16x16x64_i8(<4 x i32> %arg0, <4 x i32> %arg1, <4 ; GCN-NEXT: v_accvgpr_write_b32 a3, v11 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -439,7 +939,7 @@ define <4 x i32> @test_mfma_i32_16x16x64_i8__flags(<4 x i32> %arg0, <4 x i32> %a ; GCN-NEXT: v_accvgpr_write_b32 a3, v11 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:1 abid:1 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -452,50 +952,48 @@ define <4 x i32> @test_mfma_i32_16x16x64_i8__flags(<4 x i32> %arg0, <4 x i32> %a define amdgpu_kernel void @test_mfma_i32_16x16x64_i8_no_agpr__vgprcd(ptr addrspace(1) %out, <4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2) #0 { ; SDAG-LABEL: test_mfma_i32_16x16x64_i8_no_agpr__vgprcd: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 -; SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 -; SDAG-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x54 -; SDAG-NEXT: v_mov_b32_e32 v12, 0 +; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 +; SDAG-NEXT: v_mov_b32_e32 v8, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v0, s4 -; SDAG-NEXT: v_mov_b32_e32 v1, s5 -; SDAG-NEXT: v_mov_b32_e32 v2, s6 -; SDAG-NEXT: v_mov_b32_e32 v3, s7 -; SDAG-NEXT: v_mov_b32_e32 v4, s8 -; SDAG-NEXT: v_mov_b32_e32 v5, s9 -; SDAG-NEXT: v_mov_b32_e32 v6, s10 -; SDAG-NEXT: v_mov_b32_e32 v7, s11 -; SDAG-NEXT: v_mov_b32_e32 v8, s12 -; SDAG-NEXT: v_mov_b32_e32 v9, s13 -; SDAG-NEXT: v_mov_b32_e32 v10, s14 -; SDAG-NEXT: v_mov_b32_e32 v11, s15 +; SDAG-NEXT: v_mov_b32_e32 v0, s8 +; SDAG-NEXT: v_mov_b32_e32 v1, s9 +; SDAG-NEXT: v_mov_b32_e32 v2, s10 +; SDAG-NEXT: v_mov_b32_e32 v3, s11 +; SDAG-NEXT: v_mov_b32_e32 v4, s12 +; SDAG-NEXT: v_mov_b32_e32 v5, s13 +; SDAG-NEXT: v_mov_b32_e32 v6, s14 +; SDAG-NEXT: v_mov_b32_e32 v7, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a0, s0 +; SDAG-NEXT: v_accvgpr_write_b32 a1, s1 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s2 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s3 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_i32_16x16x64_i8 v[0:3], v[0:3], v[4:7], v[8:11] -; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 -; SDAG-NEXT: global_store_dwordx4 v12, v[0:3], s[2:3] +; SDAG-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3] +; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_i32_16x16x64_i8_no_agpr__vgprcd: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 -; GISEL-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x54 -; GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 +; GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[14:15] -; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_i32_16x16x64_i8 v[0:3], v[0:3], v[4:7], v[8:11] -; GISEL-NEXT: v_mov_b32_e32 v4, 0 -; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s0 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s1 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s2 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s3 ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] +; GISEL-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3] +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: s_nop 5 +; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7] ; GISEL-NEXT: s_endpgm %result = call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x64.i8(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 0, i32 0, i32 0) store <4 x i32> %result, ptr addrspace(1) %out @@ -505,50 +1003,48 @@ define amdgpu_kernel void @test_mfma_i32_16x16x64_i8_no_agpr__vgprcd(ptr addrspa define amdgpu_kernel void @test_mfma_i32_16x16x64_i8_no_agpr__vgprcd__flags(ptr addrspace(1) %out, <4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2) #0 { ; SDAG-LABEL: test_mfma_i32_16x16x64_i8_no_agpr__vgprcd__flags: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 -; SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 -; SDAG-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x54 -; SDAG-NEXT: v_mov_b32_e32 v12, 0 +; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 +; SDAG-NEXT: v_mov_b32_e32 v8, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v0, s4 -; SDAG-NEXT: v_mov_b32_e32 v1, s5 -; SDAG-NEXT: v_mov_b32_e32 v2, s6 -; SDAG-NEXT: v_mov_b32_e32 v3, s7 -; SDAG-NEXT: v_mov_b32_e32 v4, s8 -; SDAG-NEXT: v_mov_b32_e32 v5, s9 -; SDAG-NEXT: v_mov_b32_e32 v6, s10 -; SDAG-NEXT: v_mov_b32_e32 v7, s11 -; SDAG-NEXT: v_mov_b32_e32 v8, s12 -; SDAG-NEXT: v_mov_b32_e32 v9, s13 -; SDAG-NEXT: v_mov_b32_e32 v10, s14 -; SDAG-NEXT: v_mov_b32_e32 v11, s15 +; SDAG-NEXT: v_mov_b32_e32 v0, s8 +; SDAG-NEXT: v_mov_b32_e32 v1, s9 +; SDAG-NEXT: v_mov_b32_e32 v2, s10 +; SDAG-NEXT: v_mov_b32_e32 v3, s11 +; SDAG-NEXT: v_mov_b32_e32 v4, s12 +; SDAG-NEXT: v_mov_b32_e32 v5, s13 +; SDAG-NEXT: v_mov_b32_e32 v6, s14 +; SDAG-NEXT: v_mov_b32_e32 v7, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a0, s0 +; SDAG-NEXT: v_accvgpr_write_b32 a1, s1 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s2 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s3 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_i32_16x16x64_i8 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1 -; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 -; SDAG-NEXT: global_store_dwordx4 v12, v[0:3], s[2:3] +; SDAG-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1 +; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_i32_16x16x64_i8_no_agpr__vgprcd__flags: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 -; GISEL-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x54 -; GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 +; GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[14:15] -; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_i32_16x16x64_i8 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1 -; GISEL-NEXT: v_mov_b32_e32 v4, 0 -; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s0 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s1 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s2 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s3 ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] +; GISEL-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1 +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: s_nop 5 +; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7] ; GISEL-NEXT: s_endpgm %result = call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x64.i8(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 3, i32 2, i32 1) store <4 x i32> %result, ptr addrspace(1) %out @@ -564,137 +1060,131 @@ declare <16 x i32> @llvm.amdgcn.mfma.i32.32x32x32.i8(<4 x i32>, <4 x i32>, <16 x define amdgpu_kernel void @test_mfma_i32_32x32x32_i8(<4 x i32> %arg0, <4 x i32> %arg1, <16 x i32> %arg2) #1 { ; SDAG-LABEL: test_mfma_i32_32x32x32_i8: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx8 s[20:27], s[0:1], 0x24 -; SDAG-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; SDAG-NEXT: v_mov_b64_e32 v[8:9], 16 -; SDAG-NEXT: v_mov_b64_e32 v[10:11], 0 +; SDAG-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; SDAG-NEXT: v_mov_b64_e32 v[8:9], 48 +; SDAG-NEXT: v_mov_b64_e32 v[10:11], 32 +; SDAG-NEXT: v_mov_b64_e32 v[12:13], 16 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v0, s20 -; SDAG-NEXT: v_accvgpr_write_b32 a31, s19 -; SDAG-NEXT: v_mov_b32_e32 v1, s21 -; SDAG-NEXT: v_mov_b32_e32 v2, s22 -; SDAG-NEXT: v_mov_b32_e32 v3, s23 -; SDAG-NEXT: v_mov_b32_e32 v4, s24 -; SDAG-NEXT: v_mov_b32_e32 v5, s25 -; SDAG-NEXT: v_mov_b32_e32 v6, s26 -; SDAG-NEXT: v_mov_b32_e32 v7, s27 -; SDAG-NEXT: v_accvgpr_write_b32 a30, s18 -; SDAG-NEXT: v_accvgpr_write_b32 a29, s17 -; SDAG-NEXT: v_accvgpr_write_b32 a28, s16 -; SDAG-NEXT: v_accvgpr_write_b32 a27, s15 -; SDAG-NEXT: v_accvgpr_write_b32 a26, s14 -; SDAG-NEXT: v_accvgpr_write_b32 a25, s13 -; SDAG-NEXT: v_accvgpr_write_b32 a24, s12 -; SDAG-NEXT: v_accvgpr_write_b32 a23, s11 -; SDAG-NEXT: v_accvgpr_write_b32 a22, s10 -; SDAG-NEXT: v_accvgpr_write_b32 a21, s9 -; SDAG-NEXT: v_accvgpr_write_b32 a20, s8 -; SDAG-NEXT: v_accvgpr_write_b32 a19, s7 -; SDAG-NEXT: v_accvgpr_write_b32 a18, s6 -; SDAG-NEXT: v_accvgpr_write_b32 a17, s5 -; SDAG-NEXT: v_accvgpr_write_b32 a16, s4 -; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[16:31] -; SDAG-NEXT: v_mov_b64_e32 v[4:5], 48 -; SDAG-NEXT: v_mov_b64_e32 v[6:7], 32 -; SDAG-NEXT: v_mov_b32_e32 v0, s12 -; SDAG-NEXT: v_mov_b32_e32 v1, s13 -; SDAG-NEXT: v_mov_b32_e32 v2, s14 -; SDAG-NEXT: v_mov_b32_e32 v3, s15 -; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 4 -; SDAG-NEXT: global_store_dwordx4 v[4:5], a[12:15], off sc0 sc1 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v[6:7], a[8:11], off sc0 sc1 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v[8:9], a[4:7], off sc0 sc1 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v[10:11], a[0:3], off sc0 sc1 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1 -; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v0, s24 +; SDAG-NEXT: v_mov_b32_e32 v1, s25 +; SDAG-NEXT: v_mov_b32_e32 v2, s26 +; SDAG-NEXT: v_mov_b32_e32 v3, s27 +; SDAG-NEXT: v_accvgpr_write_b32 a0, s8 +; SDAG-NEXT: v_mov_b32_e32 v4, s28 +; SDAG-NEXT: v_mov_b32_e32 v5, s29 +; SDAG-NEXT: v_mov_b32_e32 v6, s30 +; SDAG-NEXT: v_mov_b32_e32 v7, s31 +; SDAG-NEXT: v_accvgpr_write_b32 a1, s9 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s10 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s11 +; SDAG-NEXT: v_accvgpr_write_b32 a4, s12 +; SDAG-NEXT: v_accvgpr_write_b32 a5, s13 +; SDAG-NEXT: v_accvgpr_write_b32 a6, s14 +; SDAG-NEXT: v_accvgpr_write_b32 a7, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a8, s16 +; SDAG-NEXT: v_accvgpr_write_b32 a9, s17 +; SDAG-NEXT: v_accvgpr_write_b32 a10, s18 +; SDAG-NEXT: v_accvgpr_write_b32 a11, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a12, s20 +; SDAG-NEXT: v_accvgpr_write_b32 a13, s21 +; SDAG-NEXT: v_accvgpr_write_b32 a14, s22 +; SDAG-NEXT: v_accvgpr_write_b32 a15, s23 +; SDAG-NEXT: v_mov_b64_e32 v[14:15], 0 ; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mfma_i32_32x32x32_i8 a[16:31], v[0:3], v[4:7], a[0:15] ; SDAG-NEXT: v_mov_b32_e32 v0, s16 ; SDAG-NEXT: v_mov_b32_e32 v1, s17 ; SDAG-NEXT: v_mov_b32_e32 v2, s18 ; SDAG-NEXT: v_mov_b32_e32 v3, s19 -; SDAG-NEXT: global_store_dwordx4 v[4:5], v[0:3], off sc0 sc1 +; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: global_store_dwordx4 v[8:9], a[28:31], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[10:11], a[24:27], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[12:13], a[20:23], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[14:15], a[16:19], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_mov_b32_e32 v0, s4 -; SDAG-NEXT: v_mov_b32_e32 v1, s5 -; SDAG-NEXT: v_mov_b32_e32 v2, s6 -; SDAG-NEXT: v_mov_b32_e32 v3, s7 ; SDAG-NEXT: global_store_dwordx4 v[10:11], v[0:3], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mov_b32_e32 v0, s20 +; SDAG-NEXT: v_mov_b32_e32 v1, s21 +; SDAG-NEXT: v_mov_b32_e32 v2, s22 +; SDAG-NEXT: v_mov_b32_e32 v3, s23 +; SDAG-NEXT: global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mov_b32_e32 v0, s8 ; SDAG-NEXT: v_mov_b32_e32 v1, s9 ; SDAG-NEXT: v_mov_b32_e32 v2, s10 ; SDAG-NEXT: v_mov_b32_e32 v3, s11 -; SDAG-NEXT: global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v[14:15], v[0:3], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mov_b32_e32 v0, s12 +; SDAG-NEXT: v_mov_b32_e32 v1, s13 +; SDAG-NEXT: v_mov_b32_e32 v2, s14 +; SDAG-NEXT: v_mov_b32_e32 v3, s15 +; SDAG-NEXT: global_store_dwordx4 v[12:13], v[0:3], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_i32_32x32x32_i8: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_load_dwordx8 s[20:27], s[0:1], 0x24 -; GISEL-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; GISEL-NEXT: v_mov_b64_e32 v[8:9], 0 -; GISEL-NEXT: v_mov_b64_e32 v[10:11], 48 +; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GISEL-NEXT: v_mov_b64_e32 v[20:21], 0 +; GISEL-NEXT: v_mov_b64_e32 v[26:27], 48 +; GISEL-NEXT: v_mov_b64_e32 v[22:23], 16 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s9 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s10 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s11 +; GISEL-NEXT: v_accvgpr_write_b32 a4, s12 +; GISEL-NEXT: v_accvgpr_write_b32 a5, s13 +; GISEL-NEXT: v_accvgpr_write_b32 a6, s14 +; GISEL-NEXT: v_accvgpr_write_b32 a7, s15 +; GISEL-NEXT: v_accvgpr_write_b32 a8, s16 +; GISEL-NEXT: v_accvgpr_write_b32 a9, s17 +; GISEL-NEXT: v_accvgpr_write_b32 a10, s18 +; GISEL-NEXT: v_accvgpr_write_b32 a11, s19 +; GISEL-NEXT: v_accvgpr_write_b32 a12, s20 +; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 +; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 +; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[12:13] +; GISEL-NEXT: v_mfma_i32_32x32x32_i8 a[16:31], v[0:3], v[4:7], a[0:15] ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[20:21] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[24:25] -; GISEL-NEXT: v_accvgpr_write_b32 a31, s19 +; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[16:17] ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[22:23] -; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[26:27] -; GISEL-NEXT: v_accvgpr_write_b32 a30, s18 -; GISEL-NEXT: v_accvgpr_write_b32 a29, s17 -; GISEL-NEXT: v_accvgpr_write_b32 a28, s16 -; GISEL-NEXT: v_accvgpr_write_b32 a27, s15 -; GISEL-NEXT: v_accvgpr_write_b32 a26, s14 -; GISEL-NEXT: v_accvgpr_write_b32 a25, s13 -; GISEL-NEXT: v_accvgpr_write_b32 a24, s12 -; GISEL-NEXT: v_accvgpr_write_b32 a23, s11 -; GISEL-NEXT: v_accvgpr_write_b32 a22, s10 -; GISEL-NEXT: v_accvgpr_write_b32 a21, s9 -; GISEL-NEXT: v_accvgpr_write_b32 a20, s8 -; GISEL-NEXT: v_accvgpr_write_b32 a19, s7 -; GISEL-NEXT: v_accvgpr_write_b32 a18, s6 -; GISEL-NEXT: v_accvgpr_write_b32 a17, s5 -; GISEL-NEXT: v_accvgpr_write_b32 a16, s4 -; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[16:31] -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], 16 -; GISEL-NEXT: v_mov_b64_e32 v[6:7], 32 -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 6 -; GISEL-NEXT: global_store_dwordx4 v[8:9], a[0:3], off sc0 sc1 +; GISEL-NEXT: v_mov_b64_e32 v[24:25], 32 +; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19] +; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: global_store_dwordx4 v[20:21], a[16:19], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[4:5], a[4:7], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[22:23], a[20:23], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[6:7], a[8:11], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[24:25], a[24:27], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[10:11], a[12:15], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[26:27], a[28:31], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[20:21], v[8:11], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] -; GISEL-NEXT: global_store_dwordx4 v[4:5], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[22:23], v[12:15], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[14:15] -; GISEL-NEXT: global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[24:25], v[16:19], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[18:19] -; GISEL-NEXT: global_store_dwordx4 v[10:11], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[26:27], v[0:3], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_endpgm %result = call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x32.i8(<4 x i32> %arg0, <4 x i32> %arg1, <16 x i32> %arg2, i32 0, i32 0, i32 0) @@ -706,137 +1196,131 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8(<4 x i32> %arg0, <4 x i32> define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__flags(<4 x i32> %arg0, <4 x i32> %arg1, <16 x i32> %arg2) #1 { ; SDAG-LABEL: test_mfma_i32_32x32x32_i8__flags: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx8 s[20:27], s[0:1], 0x24 -; SDAG-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; SDAG-NEXT: v_mov_b64_e32 v[8:9], 16 -; SDAG-NEXT: v_mov_b64_e32 v[10:11], 0 +; SDAG-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; SDAG-NEXT: v_mov_b64_e32 v[8:9], 48 +; SDAG-NEXT: v_mov_b64_e32 v[10:11], 32 +; SDAG-NEXT: v_mov_b64_e32 v[12:13], 16 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v0, s20 -; SDAG-NEXT: v_accvgpr_write_b32 a31, s19 -; SDAG-NEXT: v_mov_b32_e32 v1, s21 -; SDAG-NEXT: v_mov_b32_e32 v2, s22 -; SDAG-NEXT: v_mov_b32_e32 v3, s23 -; SDAG-NEXT: v_mov_b32_e32 v4, s24 -; SDAG-NEXT: v_mov_b32_e32 v5, s25 -; SDAG-NEXT: v_mov_b32_e32 v6, s26 -; SDAG-NEXT: v_mov_b32_e32 v7, s27 -; SDAG-NEXT: v_accvgpr_write_b32 a30, s18 -; SDAG-NEXT: v_accvgpr_write_b32 a29, s17 -; SDAG-NEXT: v_accvgpr_write_b32 a28, s16 -; SDAG-NEXT: v_accvgpr_write_b32 a27, s15 -; SDAG-NEXT: v_accvgpr_write_b32 a26, s14 -; SDAG-NEXT: v_accvgpr_write_b32 a25, s13 -; SDAG-NEXT: v_accvgpr_write_b32 a24, s12 -; SDAG-NEXT: v_accvgpr_write_b32 a23, s11 -; SDAG-NEXT: v_accvgpr_write_b32 a22, s10 -; SDAG-NEXT: v_accvgpr_write_b32 a21, s9 -; SDAG-NEXT: v_accvgpr_write_b32 a20, s8 -; SDAG-NEXT: v_accvgpr_write_b32 a19, s7 -; SDAG-NEXT: v_accvgpr_write_b32 a18, s6 -; SDAG-NEXT: v_accvgpr_write_b32 a17, s5 -; SDAG-NEXT: v_accvgpr_write_b32 a16, s4 -; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[16:31] cbsz:2 abid:3 blgp:1 -; SDAG-NEXT: v_mov_b64_e32 v[4:5], 48 -; SDAG-NEXT: v_mov_b64_e32 v[6:7], 32 -; SDAG-NEXT: v_mov_b32_e32 v0, s12 -; SDAG-NEXT: v_mov_b32_e32 v1, s13 -; SDAG-NEXT: v_mov_b32_e32 v2, s14 -; SDAG-NEXT: v_mov_b32_e32 v3, s15 -; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 4 -; SDAG-NEXT: global_store_dwordx4 v[4:5], a[12:15], off sc0 sc1 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v[6:7], a[8:11], off sc0 sc1 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v[8:9], a[4:7], off sc0 sc1 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v[10:11], a[0:3], off sc0 sc1 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1 -; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v0, s24 +; SDAG-NEXT: v_mov_b32_e32 v1, s25 +; SDAG-NEXT: v_mov_b32_e32 v2, s26 +; SDAG-NEXT: v_mov_b32_e32 v3, s27 +; SDAG-NEXT: v_accvgpr_write_b32 a0, s8 +; SDAG-NEXT: v_mov_b32_e32 v4, s28 +; SDAG-NEXT: v_mov_b32_e32 v5, s29 +; SDAG-NEXT: v_mov_b32_e32 v6, s30 +; SDAG-NEXT: v_mov_b32_e32 v7, s31 +; SDAG-NEXT: v_accvgpr_write_b32 a1, s9 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s10 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s11 +; SDAG-NEXT: v_accvgpr_write_b32 a4, s12 +; SDAG-NEXT: v_accvgpr_write_b32 a5, s13 +; SDAG-NEXT: v_accvgpr_write_b32 a6, s14 +; SDAG-NEXT: v_accvgpr_write_b32 a7, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a8, s16 +; SDAG-NEXT: v_accvgpr_write_b32 a9, s17 +; SDAG-NEXT: v_accvgpr_write_b32 a10, s18 +; SDAG-NEXT: v_accvgpr_write_b32 a11, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a12, s20 +; SDAG-NEXT: v_accvgpr_write_b32 a13, s21 +; SDAG-NEXT: v_accvgpr_write_b32 a14, s22 +; SDAG-NEXT: v_accvgpr_write_b32 a15, s23 +; SDAG-NEXT: v_mov_b64_e32 v[14:15], 0 ; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mfma_i32_32x32x32_i8 a[16:31], v[0:3], v[4:7], a[0:15] cbsz:2 abid:3 blgp:1 ; SDAG-NEXT: v_mov_b32_e32 v0, s16 ; SDAG-NEXT: v_mov_b32_e32 v1, s17 ; SDAG-NEXT: v_mov_b32_e32 v2, s18 ; SDAG-NEXT: v_mov_b32_e32 v3, s19 -; SDAG-NEXT: global_store_dwordx4 v[4:5], v[0:3], off sc0 sc1 +; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: global_store_dwordx4 v[8:9], a[28:31], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[10:11], a[24:27], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[12:13], a[20:23], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[14:15], a[16:19], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_mov_b32_e32 v0, s4 -; SDAG-NEXT: v_mov_b32_e32 v1, s5 -; SDAG-NEXT: v_mov_b32_e32 v2, s6 -; SDAG-NEXT: v_mov_b32_e32 v3, s7 ; SDAG-NEXT: global_store_dwordx4 v[10:11], v[0:3], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mov_b32_e32 v0, s20 +; SDAG-NEXT: v_mov_b32_e32 v1, s21 +; SDAG-NEXT: v_mov_b32_e32 v2, s22 +; SDAG-NEXT: v_mov_b32_e32 v3, s23 +; SDAG-NEXT: global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mov_b32_e32 v0, s8 ; SDAG-NEXT: v_mov_b32_e32 v1, s9 ; SDAG-NEXT: v_mov_b32_e32 v2, s10 ; SDAG-NEXT: v_mov_b32_e32 v3, s11 -; SDAG-NEXT: global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v[14:15], v[0:3], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mov_b32_e32 v0, s12 +; SDAG-NEXT: v_mov_b32_e32 v1, s13 +; SDAG-NEXT: v_mov_b32_e32 v2, s14 +; SDAG-NEXT: v_mov_b32_e32 v3, s15 +; SDAG-NEXT: global_store_dwordx4 v[12:13], v[0:3], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_i32_32x32x32_i8__flags: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_load_dwordx8 s[20:27], s[0:1], 0x24 -; GISEL-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; GISEL-NEXT: v_mov_b64_e32 v[8:9], 0 -; GISEL-NEXT: v_mov_b64_e32 v[10:11], 48 +; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GISEL-NEXT: v_mov_b64_e32 v[20:21], 0 +; GISEL-NEXT: v_mov_b64_e32 v[26:27], 48 +; GISEL-NEXT: v_mov_b64_e32 v[22:23], 16 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s9 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s10 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s11 +; GISEL-NEXT: v_accvgpr_write_b32 a4, s12 +; GISEL-NEXT: v_accvgpr_write_b32 a5, s13 +; GISEL-NEXT: v_accvgpr_write_b32 a6, s14 +; GISEL-NEXT: v_accvgpr_write_b32 a7, s15 +; GISEL-NEXT: v_accvgpr_write_b32 a8, s16 +; GISEL-NEXT: v_accvgpr_write_b32 a9, s17 +; GISEL-NEXT: v_accvgpr_write_b32 a10, s18 +; GISEL-NEXT: v_accvgpr_write_b32 a11, s19 +; GISEL-NEXT: v_accvgpr_write_b32 a12, s20 +; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 +; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 +; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[12:13] +; GISEL-NEXT: v_mfma_i32_32x32x32_i8 a[16:31], v[0:3], v[4:7], a[0:15] cbsz:2 abid:3 blgp:1 ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[20:21] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[24:25] -; GISEL-NEXT: v_accvgpr_write_b32 a31, s19 +; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[16:17] ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[22:23] -; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[26:27] -; GISEL-NEXT: v_accvgpr_write_b32 a30, s18 -; GISEL-NEXT: v_accvgpr_write_b32 a29, s17 -; GISEL-NEXT: v_accvgpr_write_b32 a28, s16 -; GISEL-NEXT: v_accvgpr_write_b32 a27, s15 -; GISEL-NEXT: v_accvgpr_write_b32 a26, s14 -; GISEL-NEXT: v_accvgpr_write_b32 a25, s13 -; GISEL-NEXT: v_accvgpr_write_b32 a24, s12 -; GISEL-NEXT: v_accvgpr_write_b32 a23, s11 -; GISEL-NEXT: v_accvgpr_write_b32 a22, s10 -; GISEL-NEXT: v_accvgpr_write_b32 a21, s9 -; GISEL-NEXT: v_accvgpr_write_b32 a20, s8 -; GISEL-NEXT: v_accvgpr_write_b32 a19, s7 -; GISEL-NEXT: v_accvgpr_write_b32 a18, s6 -; GISEL-NEXT: v_accvgpr_write_b32 a17, s5 -; GISEL-NEXT: v_accvgpr_write_b32 a16, s4 -; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[16:31] cbsz:2 abid:3 blgp:1 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], 16 -; GISEL-NEXT: v_mov_b64_e32 v[6:7], 32 -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 6 -; GISEL-NEXT: global_store_dwordx4 v[8:9], a[0:3], off sc0 sc1 +; GISEL-NEXT: v_mov_b64_e32 v[24:25], 32 +; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19] +; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: global_store_dwordx4 v[20:21], a[16:19], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[4:5], a[4:7], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[22:23], a[20:23], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[6:7], a[8:11], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[24:25], a[24:27], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[10:11], a[12:15], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[26:27], a[28:31], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[20:21], v[8:11], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] -; GISEL-NEXT: global_store_dwordx4 v[4:5], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[22:23], v[12:15], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[14:15] -; GISEL-NEXT: global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[24:25], v[16:19], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[18:19] -; GISEL-NEXT: global_store_dwordx4 v[10:11], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[26:27], v[0:3], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_endpgm %result = call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x32.i8(<4 x i32> %arg0, <4 x i32> %arg1, <16 x i32> %arg2, i32 2, i32 3, i32 1) @@ -867,7 +1351,8 @@ define <16 x i32> @test_mfma_i32_32x32x32_i8__mac(<4 x i32> %arg0, <4 x i32> %ar ; GCN-NEXT: v_accvgpr_write_b32 a15, v23 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -911,7 +1396,8 @@ define <16 x i32> @test_mfma_i32_32x32x32_i8__mac__flags(<4 x i32> %arg0, <4 x i ; GCN-NEXT: v_accvgpr_write_b32 a15, v23 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:1 abid:1 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -936,116 +1422,126 @@ define <16 x i32> @test_mfma_i32_32x32x32_i8__mac__flags(<4 x i32> %arg0, <4 x i define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd(<4 x i32> %arg0, <4 x i32> %arg1, <16 x i32> %arg2, ptr addrspace(1) %out) #0 { ; SDAG-LABEL: test_mfma_i32_32x32x32_i8__vgprcd: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx8 s[16:23], s[0:1], 0x24 -; SDAG-NEXT: v_mov_b32_e32 v40, 0 +; SDAG-NEXT: s_load_dwordx8 s[20:27], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 +; SDAG-NEXT: v_mov_b32_e32 v8, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v32, s16 -; SDAG-NEXT: v_mov_b32_e32 v33, s17 -; SDAG-NEXT: v_mov_b32_e32 v34, s18 -; SDAG-NEXT: v_mov_b32_e32 v35, s19 -; SDAG-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; SDAG-NEXT: v_mov_b32_e32 v36, s20 -; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa4 -; SDAG-NEXT: v_mov_b32_e32 v37, s21 -; SDAG-NEXT: v_mov_b32_e32 v38, s22 +; SDAG-NEXT: v_mov_b32_e32 v0, s20 +; SDAG-NEXT: v_mov_b32_e32 v1, s21 +; SDAG-NEXT: v_mov_b32_e32 v2, s22 +; SDAG-NEXT: v_mov_b32_e32 v3, s23 +; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; SDAG-NEXT: v_mov_b32_e32 v4, s24 +; SDAG-NEXT: v_mov_b32_e32 v5, s25 +; SDAG-NEXT: v_mov_b32_e32 v6, s26 +; SDAG-NEXT: v_mov_b32_e32 v7, s27 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b64_e32 v[30:31], s[18:19] -; SDAG-NEXT: v_mov_b32_e32 v39, s23 -; SDAG-NEXT: v_mov_b64_e32 v[28:29], s[16:17] -; SDAG-NEXT: v_mov_b64_e32 v[26:27], s[14:15] -; SDAG-NEXT: v_mov_b64_e32 v[24:25], s[12:13] -; SDAG-NEXT: v_mov_b64_e32 v[22:23], s[10:11] -; SDAG-NEXT: v_mov_b64_e32 v[20:21], s[8:9] -; SDAG-NEXT: v_mov_b64_e32 v[18:19], s[6:7] -; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[4:5] +; SDAG-NEXT: v_accvgpr_write_b32 a31, s23 +; SDAG-NEXT: v_accvgpr_write_b32 a30, s22 +; SDAG-NEXT: v_accvgpr_write_b32 a29, s21 +; SDAG-NEXT: v_accvgpr_write_b32 a28, s20 +; SDAG-NEXT: v_accvgpr_write_b32 a27, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a26, s18 +; SDAG-NEXT: v_accvgpr_write_b32 a25, s17 +; SDAG-NEXT: v_accvgpr_write_b32 a24, s16 +; SDAG-NEXT: v_accvgpr_write_b32 a23, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a22, s14 +; SDAG-NEXT: v_accvgpr_write_b32 a21, s13 +; SDAG-NEXT: v_accvgpr_write_b32 a20, s12 +; SDAG-NEXT: v_accvgpr_write_b32 a19, s11 +; SDAG-NEXT: v_accvgpr_write_b32 a18, s10 +; SDAG-NEXT: v_accvgpr_write_b32 a17, s9 +; SDAG-NEXT: v_accvgpr_write_b32 a16, s8 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[32:35], v[36:39], v[16:31] -; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 6 -; SDAG-NEXT: v_mov_b32_e32 v16, s16 -; SDAG-NEXT: v_mov_b32_e32 v17, s17 -; SDAG-NEXT: v_mov_b32_e32 v18, s18 -; SDAG-NEXT: v_mov_b32_e32 v19, s19 -; SDAG-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:48 sc0 sc1 +; SDAG-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[16:31] +; SDAG-NEXT: v_mov_b32_e32 v0, s20 +; SDAG-NEXT: v_mov_b32_e32 v1, s21 +; SDAG-NEXT: v_mov_b32_e32 v2, s22 +; SDAG-NEXT: v_mov_b32_e32 v3, s23 +; SDAG-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:48 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_mov_b32_e32 v16, s12 -; SDAG-NEXT: v_mov_b32_e32 v17, s13 -; SDAG-NEXT: v_mov_b32_e32 v18, s14 -; SDAG-NEXT: v_mov_b32_e32 v19, s15 -; SDAG-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:32 sc0 sc1 +; SDAG-NEXT: v_mov_b32_e32 v0, s16 +; SDAG-NEXT: v_mov_b32_e32 v1, s17 +; SDAG-NEXT: v_mov_b32_e32 v2, s18 +; SDAG-NEXT: v_mov_b32_e32 v3, s19 +; SDAG-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:32 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_mov_b32_e32 v16, s8 -; SDAG-NEXT: v_mov_b32_e32 v17, s9 -; SDAG-NEXT: v_mov_b32_e32 v18, s10 -; SDAG-NEXT: v_mov_b32_e32 v19, s11 -; SDAG-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:16 sc0 sc1 +; SDAG-NEXT: v_mov_b32_e32 v0, s12 +; SDAG-NEXT: v_mov_b32_e32 v1, s13 +; SDAG-NEXT: v_mov_b32_e32 v2, s14 +; SDAG-NEXT: v_mov_b32_e32 v3, s15 +; SDAG-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_mov_b32_e32 v16, s4 -; SDAG-NEXT: v_mov_b32_e32 v17, s5 -; SDAG-NEXT: v_mov_b32_e32 v18, s6 -; SDAG-NEXT: v_mov_b32_e32 v19, s7 -; SDAG-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] sc0 sc1 +; SDAG-NEXT: v_mov_b32_e32 v0, s8 +; SDAG-NEXT: v_mov_b32_e32 v1, s9 +; SDAG-NEXT: v_mov_b32_e32 v2, s10 +; SDAG-NEXT: v_mov_b32_e32 v3, s11 +; SDAG-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v40, v[8:11], s[0:1] offset:32 sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v8, a[8:11], s[0:1] offset:32 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v40, v[12:15], s[0:1] offset:48 sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v8, a[12:15], s[0:1] offset:48 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v40, v[0:3], s[0:1] sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[0:1] sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v40, v[4:7], s[0:1] offset:16 sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v8, a[4:7], s[0:1] offset:16 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_i32_32x32x32_i8__vgprcd: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_load_dwordx8 s[20:27], s[0:1], 0x24 -; GISEL-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xa4 +; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 +; GISEL-NEXT: v_mov_b32_e32 v24, 0 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[34:35], s[22:23] -; GISEL-NEXT: v_mov_b64_e32 v[38:39], s[26:27] -; GISEL-NEXT: v_mov_b64_e32 v[30:31], s[18:19] -; GISEL-NEXT: v_mov_b64_e32 v[32:33], s[20:21] -; GISEL-NEXT: v_mov_b64_e32 v[36:37], s[24:25] -; GISEL-NEXT: v_mov_b64_e32 v[28:29], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[26:27], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[24:25], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[6:7] -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[4:5] -; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[32:35], v[36:39], v[16:31] -; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 6 -; GISEL-NEXT: v_mov_b32_e32 v20, 0 -; GISEL-NEXT: global_store_dwordx4 v20, v[16:19], s[2:3] sc0 sc1 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s9 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s10 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s11 +; GISEL-NEXT: v_accvgpr_write_b32 a4, s12 +; GISEL-NEXT: v_accvgpr_write_b32 a5, s13 +; GISEL-NEXT: v_accvgpr_write_b32 a6, s14 +; GISEL-NEXT: v_accvgpr_write_b32 a7, s15 +; GISEL-NEXT: v_accvgpr_write_b32 a8, s16 +; GISEL-NEXT: v_accvgpr_write_b32 a9, s17 +; GISEL-NEXT: v_accvgpr_write_b32 a10, s18 +; GISEL-NEXT: v_accvgpr_write_b32 a11, s19 +; GISEL-NEXT: v_accvgpr_write_b32 a12, s20 +; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 +; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 +; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11] +; GISEL-NEXT: v_mfma_i32_32x32x32_i8 a[16:31], v[0:3], v[4:7], a[0:15] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[22:23] +; GISEL-NEXT: global_store_dwordx4 v24, v[8:11], s[0:1] sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[8:9] -; GISEL-NEXT: global_store_dwordx4 v20, v[16:19], s[2:3] offset:16 sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, v[12:15], s[0:1] offset:16 sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[12:13] -; GISEL-NEXT: global_store_dwordx4 v20, v[16:19], s[2:3] offset:32 sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, v[16:19], s[0:1] offset:32 sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19] -; GISEL-NEXT: global_store_dwordx4 v20, v[16:19], s[2:3] offset:48 sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, v[20:23], s[0:1] offset:48 sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v20, v[0:3], s[2:3] sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, a[16:19], s[0:1] sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v20, v[4:7], s[2:3] offset:16 sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, a[20:23], s[0:1] offset:16 sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v20, v[8:11], s[2:3] offset:32 sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, a[24:27], s[0:1] offset:32 sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v20, v[12:15], s[2:3] offset:48 sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, a[28:31], s[0:1] offset:48 sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_endpgm %result = call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x32.i8(<4 x i32> %arg0, <4 x i32> %arg1, <16 x i32> %arg2, i32 0, i32 0, i32 0) @@ -1057,116 +1553,126 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd(<4 x i32> %arg0, <4 define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd__flags(<4 x i32> %arg0, <4 x i32> %arg1, <16 x i32> %arg2, ptr addrspace(1) %out) #0 { ; SDAG-LABEL: test_mfma_i32_32x32x32_i8__vgprcd__flags: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx8 s[16:23], s[0:1], 0x24 -; SDAG-NEXT: v_mov_b32_e32 v40, 0 +; SDAG-NEXT: s_load_dwordx8 s[20:27], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 +; SDAG-NEXT: v_mov_b32_e32 v8, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v32, s16 -; SDAG-NEXT: v_mov_b32_e32 v33, s17 -; SDAG-NEXT: v_mov_b32_e32 v34, s18 -; SDAG-NEXT: v_mov_b32_e32 v35, s19 -; SDAG-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; SDAG-NEXT: v_mov_b32_e32 v36, s20 -; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa4 -; SDAG-NEXT: v_mov_b32_e32 v37, s21 -; SDAG-NEXT: v_mov_b32_e32 v38, s22 +; SDAG-NEXT: v_mov_b32_e32 v0, s20 +; SDAG-NEXT: v_mov_b32_e32 v1, s21 +; SDAG-NEXT: v_mov_b32_e32 v2, s22 +; SDAG-NEXT: v_mov_b32_e32 v3, s23 +; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; SDAG-NEXT: v_mov_b32_e32 v4, s24 +; SDAG-NEXT: v_mov_b32_e32 v5, s25 +; SDAG-NEXT: v_mov_b32_e32 v6, s26 +; SDAG-NEXT: v_mov_b32_e32 v7, s27 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b64_e32 v[30:31], s[18:19] -; SDAG-NEXT: v_mov_b32_e32 v39, s23 -; SDAG-NEXT: v_mov_b64_e32 v[28:29], s[16:17] -; SDAG-NEXT: v_mov_b64_e32 v[26:27], s[14:15] -; SDAG-NEXT: v_mov_b64_e32 v[24:25], s[12:13] -; SDAG-NEXT: v_mov_b64_e32 v[22:23], s[10:11] -; SDAG-NEXT: v_mov_b64_e32 v[20:21], s[8:9] -; SDAG-NEXT: v_mov_b64_e32 v[18:19], s[6:7] -; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[4:5] +; SDAG-NEXT: v_accvgpr_write_b32 a31, s23 +; SDAG-NEXT: v_accvgpr_write_b32 a30, s22 +; SDAG-NEXT: v_accvgpr_write_b32 a29, s21 +; SDAG-NEXT: v_accvgpr_write_b32 a28, s20 +; SDAG-NEXT: v_accvgpr_write_b32 a27, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a26, s18 +; SDAG-NEXT: v_accvgpr_write_b32 a25, s17 +; SDAG-NEXT: v_accvgpr_write_b32 a24, s16 +; SDAG-NEXT: v_accvgpr_write_b32 a23, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a22, s14 +; SDAG-NEXT: v_accvgpr_write_b32 a21, s13 +; SDAG-NEXT: v_accvgpr_write_b32 a20, s12 +; SDAG-NEXT: v_accvgpr_write_b32 a19, s11 +; SDAG-NEXT: v_accvgpr_write_b32 a18, s10 +; SDAG-NEXT: v_accvgpr_write_b32 a17, s9 +; SDAG-NEXT: v_accvgpr_write_b32 a16, s8 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[32:35], v[36:39], v[16:31] cbsz:1 abid:2 blgp:3 -; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 6 -; SDAG-NEXT: v_mov_b32_e32 v16, s16 -; SDAG-NEXT: v_mov_b32_e32 v17, s17 -; SDAG-NEXT: v_mov_b32_e32 v18, s18 -; SDAG-NEXT: v_mov_b32_e32 v19, s19 -; SDAG-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:48 sc0 sc1 +; SDAG-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[16:31] cbsz:1 abid:2 blgp:3 +; SDAG-NEXT: v_mov_b32_e32 v0, s20 +; SDAG-NEXT: v_mov_b32_e32 v1, s21 +; SDAG-NEXT: v_mov_b32_e32 v2, s22 +; SDAG-NEXT: v_mov_b32_e32 v3, s23 +; SDAG-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:48 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_mov_b32_e32 v16, s12 -; SDAG-NEXT: v_mov_b32_e32 v17, s13 -; SDAG-NEXT: v_mov_b32_e32 v18, s14 -; SDAG-NEXT: v_mov_b32_e32 v19, s15 -; SDAG-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:32 sc0 sc1 +; SDAG-NEXT: v_mov_b32_e32 v0, s16 +; SDAG-NEXT: v_mov_b32_e32 v1, s17 +; SDAG-NEXT: v_mov_b32_e32 v2, s18 +; SDAG-NEXT: v_mov_b32_e32 v3, s19 +; SDAG-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:32 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_mov_b32_e32 v16, s8 -; SDAG-NEXT: v_mov_b32_e32 v17, s9 -; SDAG-NEXT: v_mov_b32_e32 v18, s10 -; SDAG-NEXT: v_mov_b32_e32 v19, s11 -; SDAG-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:16 sc0 sc1 +; SDAG-NEXT: v_mov_b32_e32 v0, s12 +; SDAG-NEXT: v_mov_b32_e32 v1, s13 +; SDAG-NEXT: v_mov_b32_e32 v2, s14 +; SDAG-NEXT: v_mov_b32_e32 v3, s15 +; SDAG-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_mov_b32_e32 v16, s4 -; SDAG-NEXT: v_mov_b32_e32 v17, s5 -; SDAG-NEXT: v_mov_b32_e32 v18, s6 -; SDAG-NEXT: v_mov_b32_e32 v19, s7 -; SDAG-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] sc0 sc1 +; SDAG-NEXT: v_mov_b32_e32 v0, s8 +; SDAG-NEXT: v_mov_b32_e32 v1, s9 +; SDAG-NEXT: v_mov_b32_e32 v2, s10 +; SDAG-NEXT: v_mov_b32_e32 v3, s11 +; SDAG-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v40, v[8:11], s[0:1] offset:32 sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v8, a[8:11], s[0:1] offset:32 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v40, v[12:15], s[0:1] offset:48 sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v8, a[12:15], s[0:1] offset:48 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v40, v[0:3], s[0:1] sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[0:1] sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v40, v[4:7], s[0:1] offset:16 sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v8, a[4:7], s[0:1] offset:16 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_i32_32x32x32_i8__vgprcd__flags: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_load_dwordx8 s[20:27], s[0:1], 0x24 -; GISEL-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xa4 +; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 +; GISEL-NEXT: v_mov_b32_e32 v24, 0 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[34:35], s[22:23] -; GISEL-NEXT: v_mov_b64_e32 v[38:39], s[26:27] -; GISEL-NEXT: v_mov_b64_e32 v[30:31], s[18:19] -; GISEL-NEXT: v_mov_b64_e32 v[32:33], s[20:21] -; GISEL-NEXT: v_mov_b64_e32 v[36:37], s[24:25] -; GISEL-NEXT: v_mov_b64_e32 v[28:29], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[26:27], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[24:25], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[6:7] -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[4:5] -; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[32:35], v[36:39], v[16:31] cbsz:1 abid:2 blgp:3 -; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 6 -; GISEL-NEXT: v_mov_b32_e32 v20, 0 -; GISEL-NEXT: global_store_dwordx4 v20, v[16:19], s[2:3] sc0 sc1 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s9 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s10 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s11 +; GISEL-NEXT: v_accvgpr_write_b32 a4, s12 +; GISEL-NEXT: v_accvgpr_write_b32 a5, s13 +; GISEL-NEXT: v_accvgpr_write_b32 a6, s14 +; GISEL-NEXT: v_accvgpr_write_b32 a7, s15 +; GISEL-NEXT: v_accvgpr_write_b32 a8, s16 +; GISEL-NEXT: v_accvgpr_write_b32 a9, s17 +; GISEL-NEXT: v_accvgpr_write_b32 a10, s18 +; GISEL-NEXT: v_accvgpr_write_b32 a11, s19 +; GISEL-NEXT: v_accvgpr_write_b32 a12, s20 +; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 +; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 +; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11] +; GISEL-NEXT: v_mfma_i32_32x32x32_i8 a[16:31], v[0:3], v[4:7], a[0:15] cbsz:1 abid:2 blgp:3 +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[22:23] +; GISEL-NEXT: global_store_dwordx4 v24, v[8:11], s[0:1] sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[8:9] -; GISEL-NEXT: global_store_dwordx4 v20, v[16:19], s[2:3] offset:16 sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, v[12:15], s[0:1] offset:16 sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[12:13] -; GISEL-NEXT: global_store_dwordx4 v20, v[16:19], s[2:3] offset:32 sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, v[16:19], s[0:1] offset:32 sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19] -; GISEL-NEXT: global_store_dwordx4 v20, v[16:19], s[2:3] offset:48 sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, v[20:23], s[0:1] offset:48 sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v20, v[0:3], s[2:3] sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, a[16:19], s[0:1] sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v20, v[4:7], s[2:3] offset:16 sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, a[20:23], s[0:1] offset:16 sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v20, v[8:11], s[2:3] offset:32 sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, a[24:27], s[0:1] offset:32 sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v20, v[12:15], s[2:3] offset:48 sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, a[28:31], s[0:1] offset:48 sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_endpgm %result = call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x32.i8(<4 x i32> %arg0, <4 x i32> %arg1, <16 x i32> %arg2, i32 1, i32 2, i32 3) @@ -1178,67 +1684,81 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd__flags(<4 x i32> %a define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd_mac(<4 x i32> %arg0, <4 x i32> %arg1, <16 x i32> %arg2, ptr addrspace(1) %out) #0 { ; SDAG-LABEL: test_mfma_i32_32x32x32_i8__vgprcd_mac: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx8 s[16:23], s[0:1], 0x24 -; SDAG-NEXT: v_mov_b32_e32 v24, 0 +; SDAG-NEXT: s_load_dwordx8 s[20:27], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v16, s16 -; SDAG-NEXT: v_mov_b32_e32 v17, s17 -; SDAG-NEXT: v_mov_b32_e32 v18, s18 -; SDAG-NEXT: v_mov_b32_e32 v19, s19 -; SDAG-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; SDAG-NEXT: v_mov_b32_e32 v20, s20 -; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa4 -; SDAG-NEXT: v_mov_b32_e32 v21, s21 -; SDAG-NEXT: v_mov_b32_e32 v22, s22 +; SDAG-NEXT: v_mov_b32_e32 v0, s20 +; SDAG-NEXT: v_mov_b32_e32 v1, s21 +; SDAG-NEXT: v_mov_b32_e32 v2, s22 +; SDAG-NEXT: v_mov_b32_e32 v3, s23 +; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; SDAG-NEXT: v_mov_b32_e32 v4, s24 +; SDAG-NEXT: v_mov_b32_e32 v5, s25 +; SDAG-NEXT: v_mov_b32_e32 v6, s26 +; SDAG-NEXT: v_mov_b32_e32 v7, s27 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; SDAG-NEXT: v_mov_b32_e32 v23, s23 -; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[14:15] -; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[16:17] -; SDAG-NEXT: v_mov_b64_e32 v[14:15], s[18:19] +; SDAG-NEXT: v_accvgpr_write_b32 a0, s8 +; SDAG-NEXT: v_accvgpr_write_b32 a1, s9 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s10 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s11 +; SDAG-NEXT: v_accvgpr_write_b32 a4, s12 +; SDAG-NEXT: v_accvgpr_write_b32 a5, s13 +; SDAG-NEXT: v_accvgpr_write_b32 a6, s14 +; SDAG-NEXT: v_accvgpr_write_b32 a7, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a8, s16 +; SDAG-NEXT: v_accvgpr_write_b32 a9, s17 +; SDAG-NEXT: v_accvgpr_write_b32 a10, s18 +; SDAG-NEXT: v_accvgpr_write_b32 a11, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a12, s20 +; SDAG-NEXT: v_accvgpr_write_b32 a13, s21 +; SDAG-NEXT: v_accvgpr_write_b32 a14, s22 +; SDAG-NEXT: v_accvgpr_write_b32 a15, s23 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[16:19], v[20:23], v[0:15] -; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15] +; SDAG-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 -; SDAG-NEXT: global_store_dwordx4 v24, v[12:15], s[0:1] offset:48 -; SDAG-NEXT: global_store_dwordx4 v24, v[8:11], s[0:1] offset:32 -; SDAG-NEXT: global_store_dwordx4 v24, v[4:7], s[0:1] offset:16 -; SDAG-NEXT: global_store_dwordx4 v24, v[0:3], s[0:1] +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 +; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 +; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 +; SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_i32_32x32x32_i8__vgprcd_mac: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_load_dwordx8 s[20:27], s[0:1], 0x24 -; GISEL-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xa4 +; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[20:21] -; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[24:25] -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[22:23] -; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[26:27] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s9 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s10 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s11 +; GISEL-NEXT: v_accvgpr_write_b32 a4, s12 +; GISEL-NEXT: v_accvgpr_write_b32 a5, s13 +; GISEL-NEXT: v_accvgpr_write_b32 a6, s14 +; GISEL-NEXT: v_accvgpr_write_b32 a7, s15 +; GISEL-NEXT: v_accvgpr_write_b32 a8, s16 +; GISEL-NEXT: v_accvgpr_write_b32 a9, s17 +; GISEL-NEXT: v_accvgpr_write_b32 a10, s18 +; GISEL-NEXT: v_accvgpr_write_b32 a11, s19 +; GISEL-NEXT: v_accvgpr_write_b32 a12, s20 +; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 +; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 +; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[16:19], v[20:23], v[0:15] -; GISEL-NEXT: v_mov_b32_e32 v16, 0 -; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15] +; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: global_store_dwordx4 v16, v[0:3], s[2:3] -; GISEL-NEXT: global_store_dwordx4 v16, v[4:7], s[2:3] offset:16 -; GISEL-NEXT: global_store_dwordx4 v16, v[8:11], s[2:3] offset:32 -; GISEL-NEXT: global_store_dwordx4 v16, v[12:15], s[2:3] offset:48 +; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] +; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 +; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 +; GISEL-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 ; GISEL-NEXT: s_endpgm %result = call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x32.i8(<4 x i32> %arg0, <4 x i32> %arg1, <16 x i32> %arg2, i32 0, i32 0, i32 0) store <16 x i32> %result, ptr addrspace(1) %out @@ -1248,67 +1768,81 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd_mac(<4 x i32> %arg0 define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd_mac_flags(<4 x i32> %arg0, <4 x i32> %arg1, <16 x i32> %arg2, ptr addrspace(1) %out) #0 { ; SDAG-LABEL: test_mfma_i32_32x32x32_i8__vgprcd_mac_flags: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx8 s[16:23], s[0:1], 0x24 -; SDAG-NEXT: v_mov_b32_e32 v24, 0 +; SDAG-NEXT: s_load_dwordx8 s[20:27], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v16, s16 -; SDAG-NEXT: v_mov_b32_e32 v17, s17 -; SDAG-NEXT: v_mov_b32_e32 v18, s18 -; SDAG-NEXT: v_mov_b32_e32 v19, s19 -; SDAG-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; SDAG-NEXT: v_mov_b32_e32 v20, s20 -; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa4 -; SDAG-NEXT: v_mov_b32_e32 v21, s21 -; SDAG-NEXT: v_mov_b32_e32 v22, s22 +; SDAG-NEXT: v_mov_b32_e32 v0, s20 +; SDAG-NEXT: v_mov_b32_e32 v1, s21 +; SDAG-NEXT: v_mov_b32_e32 v2, s22 +; SDAG-NEXT: v_mov_b32_e32 v3, s23 +; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; SDAG-NEXT: v_mov_b32_e32 v4, s24 +; SDAG-NEXT: v_mov_b32_e32 v5, s25 +; SDAG-NEXT: v_mov_b32_e32 v6, s26 +; SDAG-NEXT: v_mov_b32_e32 v7, s27 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; SDAG-NEXT: v_mov_b32_e32 v23, s23 -; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[14:15] -; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[16:17] -; SDAG-NEXT: v_mov_b64_e32 v[14:15], s[18:19] +; SDAG-NEXT: v_accvgpr_write_b32 a0, s8 +; SDAG-NEXT: v_accvgpr_write_b32 a1, s9 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s10 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s11 +; SDAG-NEXT: v_accvgpr_write_b32 a4, s12 +; SDAG-NEXT: v_accvgpr_write_b32 a5, s13 +; SDAG-NEXT: v_accvgpr_write_b32 a6, s14 +; SDAG-NEXT: v_accvgpr_write_b32 a7, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a8, s16 +; SDAG-NEXT: v_accvgpr_write_b32 a9, s17 +; SDAG-NEXT: v_accvgpr_write_b32 a10, s18 +; SDAG-NEXT: v_accvgpr_write_b32 a11, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a12, s20 +; SDAG-NEXT: v_accvgpr_write_b32 a13, s21 +; SDAG-NEXT: v_accvgpr_write_b32 a14, s22 +; SDAG-NEXT: v_accvgpr_write_b32 a15, s23 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[16:19], v[20:23], v[0:15] cbsz:3 abid:2 blgp:1 +; SDAG-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1 +; SDAG-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 -; SDAG-NEXT: global_store_dwordx4 v24, v[12:15], s[0:1] offset:48 -; SDAG-NEXT: global_store_dwordx4 v24, v[8:11], s[0:1] offset:32 -; SDAG-NEXT: global_store_dwordx4 v24, v[4:7], s[0:1] offset:16 -; SDAG-NEXT: global_store_dwordx4 v24, v[0:3], s[0:1] +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 +; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 +; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 +; SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_i32_32x32x32_i8__vgprcd_mac_flags: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_load_dwordx8 s[20:27], s[0:1], 0x24 -; GISEL-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xa4 +; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[20:21] -; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[24:25] -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[22:23] -; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[26:27] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s9 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s10 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s11 +; GISEL-NEXT: v_accvgpr_write_b32 a4, s12 +; GISEL-NEXT: v_accvgpr_write_b32 a5, s13 +; GISEL-NEXT: v_accvgpr_write_b32 a6, s14 +; GISEL-NEXT: v_accvgpr_write_b32 a7, s15 +; GISEL-NEXT: v_accvgpr_write_b32 a8, s16 +; GISEL-NEXT: v_accvgpr_write_b32 a9, s17 +; GISEL-NEXT: v_accvgpr_write_b32 a10, s18 +; GISEL-NEXT: v_accvgpr_write_b32 a11, s19 +; GISEL-NEXT: v_accvgpr_write_b32 a12, s20 +; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 +; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 +; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[16:19], v[20:23], v[0:15] cbsz:3 abid:2 blgp:1 -; GISEL-NEXT: v_mov_b32_e32 v16, 0 -; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1 +; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: global_store_dwordx4 v16, v[0:3], s[2:3] -; GISEL-NEXT: global_store_dwordx4 v16, v[4:7], s[2:3] offset:16 -; GISEL-NEXT: global_store_dwordx4 v16, v[8:11], s[2:3] offset:32 -; GISEL-NEXT: global_store_dwordx4 v16, v[12:15], s[2:3] offset:48 +; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] +; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 +; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 +; GISEL-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 ; GISEL-NEXT: s_endpgm %result = call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x32.i8(<4 x i32> %arg0, <4 x i32> %arg1, <16 x i32> %arg2, i32 3, i32 2, i32 1) store <16 x i32> %result, ptr addrspace(1) %out @@ -1428,41 +1962,44 @@ define <4 x float> @test_mfma_f32_16x16x32_bf16__flags(<8 x bfloat> %arg0, <8 x define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd(ptr addrspace(1) %out, <8 x bfloat> %arg0, <8 x bfloat> %arg1, <4 x float> %arg2) #0 { ; SDAG-LABEL: test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 -; SDAG-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x54 -; SDAG-NEXT: v_mov_b32_e32 v12, 0 -; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 +; SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; SDAG-NEXT: v_mov_b32_e32 v8, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[14:15] +; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; SDAG-NEXT: v_accvgpr_write_b32 a0, s0 +; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; SDAG-NEXT: v_accvgpr_write_b32 a1, s1 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s2 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s3 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_f32_16x16x32_bf16 v[0:3], v[0:3], v[4:7], v[8:11] +; SDAG-NEXT: v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[4:7], a[0:3] ; SDAG-NEXT: s_nop 6 -; SDAG-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 -; GISEL-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x54 +; GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 +; GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s0 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s1 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s2 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s3 ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_f32_16x16x32_bf16 v[0:3], v[0:3], v[4:7], v[8:11] -; GISEL-NEXT: v_mov_b32_e32 v4, 0 -; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: s_nop 4 -; GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GISEL-NEXT: v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[4:7], a[0:3] +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: s_nop 5 +; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7] ; GISEL-NEXT: s_endpgm %result = call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.bf16(<8 x bfloat> %arg0, <8 x bfloat> %arg1, <4 x float> %arg2, i32 0, i32 0, i32 0) store <4 x float> %result, ptr addrspace(1) %out @@ -1472,41 +2009,44 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd(ptr addrs define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd__flags(ptr addrspace(1) %out, <8 x bfloat> %arg0, <8 x bfloat> %arg1, <4 x float> %arg2) #0 { ; SDAG-LABEL: test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd__flags: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 -; SDAG-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x54 -; SDAG-NEXT: v_mov_b32_e32 v12, 0 -; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 +; SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; SDAG-NEXT: v_mov_b32_e32 v8, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[14:15] +; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; SDAG-NEXT: v_accvgpr_write_b32 a0, s0 +; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; SDAG-NEXT: v_accvgpr_write_b32 a1, s1 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s2 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s3 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_f32_16x16x32_bf16 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1 +; SDAG-NEXT: v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1 ; SDAG-NEXT: s_nop 6 -; SDAG-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd__flags: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 -; GISEL-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x54 +; GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 +; GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s0 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s1 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s2 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s3 ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_f32_16x16x32_bf16 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1 -; GISEL-NEXT: v_mov_b32_e32 v4, 0 -; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: s_nop 4 -; GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GISEL-NEXT: v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1 +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: s_nop 5 +; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7] ; GISEL-NEXT: s_endpgm %result = call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.bf16(<8 x bfloat> %arg0, <8 x bfloat> %arg1, <4 x float> %arg2, i32 3, i32 2, i32 1) store <4 x float> %result, ptr addrspace(1) %out diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll index 1e0a0bf2ca9d9..9a8282231ac15 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll @@ -23,7 +23,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp0(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -46,7 +47,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_1_1__cbsz1__blgp1(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -69,7 +71,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_2_2__cbsz1__blgp1(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -92,7 +95,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_3__cbsz1__blgp1(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -115,7 +119,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_3__cbsz1__blgp1(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -138,7 +143,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_0__cbsz1__blgp1(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -161,7 +167,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_2_3__cbsz1__blgp1(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -184,7 +191,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_2__cbsz1__blgp1(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -208,7 +216,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp0__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -232,7 +241,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp1(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -256,7 +266,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp1__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -280,7 +291,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp2(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -304,7 +316,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp2__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3] blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -328,7 +341,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp3(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -352,7 +366,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp3__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3] blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -376,7 +391,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp4(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:11], a[0:3], v16, v17 op_sel_hi:[0,0,0] blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -400,7 +416,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp4__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:11], a[0:3] blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -424,7 +441,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp0(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] cbsz:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -448,7 +466,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp0__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] cbsz:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -472,7 +491,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp1(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] cbsz:1 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -497,7 +517,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp1__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] cbsz:1 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -521,7 +542,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp2(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] cbsz:1 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -544,7 +566,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp2__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3] cbsz:1 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -568,7 +591,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp3(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] cbsz:1 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -592,7 +616,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp3__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3] cbsz:1 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -616,7 +641,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp4(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:11], a[0:3], v16, v17 op_sel_hi:[0,0,0] cbsz:1 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -640,7 +666,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp4__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:11], a[0:3] cbsz:1 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -664,7 +691,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp0(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] cbsz:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -688,7 +716,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp0__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3] cbsz:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -712,7 +741,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp1(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] cbsz:2 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -736,7 +766,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp1__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3] cbsz:2 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -760,7 +791,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp2(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3], v16, v17 op_sel_hi:[0,0,0] cbsz:2 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -784,7 +815,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp2__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3] cbsz:2 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -808,7 +839,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp3(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3], v16, v17 op_sel_hi:[0,0,0] cbsz:2 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -832,7 +863,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp3__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3] cbsz:2 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -857,7 +888,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp0(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] cbsz:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -881,7 +913,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp0__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3] cbsz:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -905,7 +938,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp1(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] cbsz:3 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -929,7 +963,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp1__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3] cbsz:3 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -953,7 +988,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp2(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3], v16, v17 op_sel_hi:[0,0,0] cbsz:3 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -977,7 +1012,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp2__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3] cbsz:3 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1001,7 +1036,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp4(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v13 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:9], a[0:3], v14, v15 op_sel_hi:[0,0,0] cbsz:3 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1025,7 +1060,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp4__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v13 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:9], a[0:3] cbsz:3 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1049,7 +1084,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp3(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3], v16, v17 op_sel_hi:[0,0,0] cbsz:3 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1073,7 +1108,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp3__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3] cbsz:3 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1097,7 +1132,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp4(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v13 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:9], a[0:3], v14, v15 op_sel_hi:[0,0,0] cbsz:2 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1121,7 +1156,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp4__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v13 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:9], a[0:3] cbsz:2 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1145,7 +1180,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp0(<4 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:11], a[0:3], v16, v17 op_sel_hi:[0,0,0] cbsz:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1169,7 +1205,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp0__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:11], a[0:3] cbsz:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1193,7 +1230,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp1(<4 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:11], a[0:3], v16, v17 op_sel_hi:[0,0,0] cbsz:4 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1217,7 +1255,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp1__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:11], a[0:3] cbsz:4 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1241,7 +1280,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp2(<4 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v13 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:9], a[0:3], v14, v15 op_sel_hi:[0,0,0] cbsz:4 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1265,7 +1304,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp2__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v13 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:9], a[0:3] cbsz:4 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1289,7 +1328,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp3(<4 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v13 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:9], a[0:3], v14, v15 op_sel_hi:[0,0,0] cbsz:4 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1313,7 +1352,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp3__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v13 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:9], a[0:3] cbsz:4 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1337,7 +1376,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp4(<4 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v11 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:7], a[0:3], v12, v13 op_sel_hi:[0,0,0] cbsz:4 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1361,7 +1400,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp4__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v11 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:4 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1389,7 +1428,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__sgpr_scaleA__sgpr_ ; GCN-NEXT: v_mov_b32_e32 v16, s1 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v16 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1409,7 +1449,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__sgpr_scaleA__vgpr_ ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v20 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1429,7 +1470,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__vgpr_scaleA__sgpr_ ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, s0 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1443,6 +1485,14 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgprs(<8 x i32> inr ; SDAG-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgprs: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v12, s0 +; SDAG-NEXT: v_mov_b32_e32 v13, s1 +; SDAG-NEXT: v_mov_b32_e32 v14, s2 +; SDAG-NEXT: v_mov_b32_e32 v15, s3 +; SDAG-NEXT: v_mov_b32_e32 v16, s16 +; SDAG-NEXT: v_mov_b32_e32 v17, s17 +; SDAG-NEXT: v_mov_b32_e32 v18, s18 +; SDAG-NEXT: v_mov_b32_e32 v19, s19 ; SDAG-NEXT: v_mov_b32_e32 v20, s28 ; SDAG-NEXT: v_mov_b32_e32 v23, v1 ; SDAG-NEXT: v_mov_b32_e32 v22, v0 @@ -1456,20 +1506,13 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgprs(<8 x i32> inr ; SDAG-NEXT: v_mov_b32_e32 v9, s25 ; SDAG-NEXT: v_mov_b32_e32 v10, s26 ; SDAG-NEXT: v_mov_b32_e32 v11, s27 -; SDAG-NEXT: v_mov_b32_e32 v12, s0 -; SDAG-NEXT: v_mov_b32_e32 v13, s1 -; SDAG-NEXT: v_mov_b32_e32 v14, s2 -; SDAG-NEXT: v_mov_b32_e32 v15, s3 -; SDAG-NEXT: v_mov_b32_e32 v16, s16 -; SDAG-NEXT: v_mov_b32_e32 v17, s17 -; SDAG-NEXT: v_mov_b32_e32 v18, s18 -; SDAG-NEXT: v_mov_b32_e32 v19, s19 ; SDAG-NEXT: v_accvgpr_write_b32 a1, v21 ; SDAG-NEXT: v_accvgpr_write_b32 a2, v22 ; SDAG-NEXT: v_accvgpr_write_b32 a3, v23 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[12:19], v[4:11], a[0:3], v2, v3 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1481,18 +1524,18 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgprs(<8 x i32> inr ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: s_mov_b32 s12, s0 ; GISEL-NEXT: s_mov_b32 s13, s1 -; GISEL-NEXT: v_mov_b32_e32 v20, s28 ; GISEL-NEXT: s_mov_b32 s14, s2 ; GISEL-NEXT: s_mov_b32 s15, s3 +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19] +; GISEL-NEXT: v_mov_b32_e32 v20, s28 ; GISEL-NEXT: v_mov_b32_e32 v22, v0 ; GISEL-NEXT: v_mov_b32_e32 v23, v1 ; GISEL-NEXT: v_mov_b32_e32 v21, s29 -; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21] ; GISEL-NEXT: v_accvgpr_write_b32 a0, v20 -; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23] ; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[24:25] ; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[26:27] @@ -1501,7 +1544,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgprs(<8 x i32> inr ; GISEL-NEXT: v_accvgpr_write_b32 a3, v23 ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[4:11], v[12:19], a[0:3], v2, v3 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1515,7 +1559,6 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_vgpr__sgp ; SDAG-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_vgpr__sgpr_vgpr: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_accvgpr_write_b32 a0, v8 ; SDAG-NEXT: v_mov_b32_e32 v14, s0 ; SDAG-NEXT: v_mov_b32_e32 v15, s1 ; SDAG-NEXT: v_mov_b32_e32 v16, s2 @@ -1524,12 +1567,14 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_vgpr__sgp ; SDAG-NEXT: v_mov_b32_e32 v19, s17 ; SDAG-NEXT: v_mov_b32_e32 v20, s18 ; SDAG-NEXT: v_mov_b32_e32 v21, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a0, v8 ; SDAG-NEXT: v_accvgpr_write_b32 a1, v9 ; SDAG-NEXT: v_accvgpr_write_b32 a2, v10 ; SDAG-NEXT: v_accvgpr_write_b32 a3, v11 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[14:21], v[0:7], a[0:3], s20, v12 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1544,16 +1589,17 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_vgpr__sgp ; GISEL-NEXT: s_mov_b32 s14, s2 ; GISEL-NEXT: s_mov_b32 s15, s3 ; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[18:19] -; GISEL-NEXT: v_accvgpr_write_b32 a0, v8 ; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[16:17] ; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[14:15] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[12:13] +; GISEL-NEXT: v_accvgpr_write_b32 a0, v8 ; GISEL-NEXT: v_accvgpr_write_b32 a1, v9 ; GISEL-NEXT: v_accvgpr_write_b32 a2, v10 ; GISEL-NEXT: v_accvgpr_write_b32 a3, v11 ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[14:21], v[0:7], a[0:3], s20, v12 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1567,7 +1613,6 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_vgpr__vgp ; SDAG-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_vgpr__vgpr_sgpr: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_accvgpr_write_b32 a0, v8 ; SDAG-NEXT: v_mov_b32_e32 v14, s0 ; SDAG-NEXT: v_mov_b32_e32 v15, s1 ; SDAG-NEXT: v_mov_b32_e32 v16, s2 @@ -1576,12 +1621,14 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_vgpr__vgp ; SDAG-NEXT: v_mov_b32_e32 v19, s17 ; SDAG-NEXT: v_mov_b32_e32 v20, s18 ; SDAG-NEXT: v_mov_b32_e32 v21, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a0, v8 ; SDAG-NEXT: v_accvgpr_write_b32 a1, v9 ; SDAG-NEXT: v_accvgpr_write_b32 a2, v10 ; SDAG-NEXT: v_accvgpr_write_b32 a3, v11 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[14:21], v[0:7], a[0:3], v12, s20 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1596,16 +1643,17 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_vgpr__vgp ; GISEL-NEXT: s_mov_b32 s14, s2 ; GISEL-NEXT: s_mov_b32 s15, s3 ; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[18:19] -; GISEL-NEXT: v_accvgpr_write_b32 a0, v8 ; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[16:17] ; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[14:15] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[12:13] +; GISEL-NEXT: v_accvgpr_write_b32 a0, v8 ; GISEL-NEXT: v_accvgpr_write_b32 a1, v9 ; GISEL-NEXT: v_accvgpr_write_b32 a2, v10 ; GISEL-NEXT: v_accvgpr_write_b32 a3, v11 ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[14:21], v[0:7], a[0:3], v12, s20 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1633,7 +1681,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_vgpr_sgpr_vgpr__vgp ; SDAG-NEXT: v_accvgpr_write_b32 a3, v11 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[14:21], a[0:3], v12, s20 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1657,7 +1706,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_vgpr_sgpr_vgpr__vgp ; GISEL-NEXT: v_accvgpr_write_b32 a3, v11 ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[14:21], a[0:3], v12, s20 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1677,7 +1727,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_vgpr_vgpr_sgpr__vgp ; GCN-NEXT: v_accvgpr_write_b32 a3, s3 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, s16 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1705,7 +1756,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_sgpr__vgp ; SDAG-NEXT: v_accvgpr_write_b32 a3, s23 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[10:17], v[0:7], a[0:3], v8, s24 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1720,16 +1772,17 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_sgpr__vgp ; GISEL-NEXT: s_mov_b32 s14, s2 ; GISEL-NEXT: s_mov_b32 s15, s3 ; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[12:13] -; GISEL-NEXT: v_accvgpr_write_b32 a0, s20 ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[14:15] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[16:17] ; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[18:19] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s20 ; GISEL-NEXT: v_accvgpr_write_b32 a1, s21 ; GISEL-NEXT: v_accvgpr_write_b32 a2, s22 ; GISEL-NEXT: v_accvgpr_write_b32 a3, s23 ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[10:17], v[0:7], a[0:3], v8, s24 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1749,7 +1802,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_inlineimm__ ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 33, -2 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1770,7 +1824,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale ; SDAG-NEXT: v_accvgpr_write_b32 a3, v19 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, -2 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1787,7 +1842,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale ; GISEL-NEXT: v_mov_b32_e32 v16, 0x41 ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1809,7 +1865,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale ; SDAG-NEXT: v_mov_b32_e32 v16, 0x4d ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v16 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1827,7 +1884,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale ; GISEL-NEXT: v_mov_b32_e32 v17, 0x4d ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, v17 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1868,7 +1926,8 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32 ; SDAG-NEXT: v_mov_b32_e32 v17, s13 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s12, v17 op_sel_hi:[0,0,0] blgp:2 -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: global_store_dwordx4 v16, a[0:3], s[14:15] ; SDAG-NEXT: s_endpgm ; @@ -1878,11 +1937,11 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32 ; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x40 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] -; GISEL-NEXT: v_accvgpr_write_b32 a0, s24 ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] ; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] ; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s24 ; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19] ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23] @@ -1893,7 +1952,8 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32 ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s28, v16 op_sel_hi:[0,0,0] blgp:2 ; GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[30:31] ; GISEL-NEXT: s_endpgm %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 0, i32 2, i32 3, i32 %scale0, i32 1, i32 %scale1) @@ -1904,13 +1964,12 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32 define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA_kimm__scaleB__inlineimm(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, ptr addrspace(1) %ptr) #0 { ; SDAG-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA_kimm__scaleB__inlineimm: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x40 ; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0 +; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x40 ; SDAG-NEXT: s_movk_i32 s6, 0x41 ; SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x50 ; SDAG-NEXT: v_mov_b32_e32 v16, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_accvgpr_write_b32 a0, s0 ; SDAG-NEXT: v_mov_b32_e32 v0, s8 ; SDAG-NEXT: v_mov_b32_e32 v1, s9 ; SDAG-NEXT: v_mov_b32_e32 v2, s10 @@ -1919,6 +1978,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA ; SDAG-NEXT: v_mov_b32_e32 v5, s13 ; SDAG-NEXT: v_mov_b32_e32 v6, s14 ; SDAG-NEXT: v_mov_b32_e32 v7, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a0, s0 ; SDAG-NEXT: v_mov_b32_e32 v8, s16 ; SDAG-NEXT: v_mov_b32_e32 v9, s17 ; SDAG-NEXT: v_mov_b32_e32 v10, s18 @@ -1932,7 +1992,8 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA ; SDAG-NEXT: v_accvgpr_write_b32 a3, s3 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s6, -2 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: global_store_dwordx4 v16, a[0:3], s[4:5] ; SDAG-NEXT: s_endpgm ; @@ -1940,15 +2001,15 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA ; GISEL: ; %bb.0: ; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0 ; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x40 -; GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x50 ; GISEL-NEXT: v_mov_b32_e32 v16, 0x41 +; GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x50 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] -; GISEL-NEXT: v_accvgpr_write_b32 a0, s0 ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] ; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] ; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s0 ; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19] ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23] @@ -1958,8 +2019,9 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[0,0,0] ; GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GISEL-NEXT: s_nop 2 -; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7] +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[4:5] ; GISEL-NEXT: s_endpgm %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 0, i32 0, i32 3, i32 65, i32 1, i32 -2) store <4 x float> %result, ptr addrspace(1) %ptr, align 16 @@ -1977,7 +2039,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___constant_scale_0_0_a( ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1998,7 +2061,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___constant_scale_0_0_b( ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2018,7 +2082,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___constant_scale_0_1(<8 ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 0, 1 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2038,7 +2103,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___constant_scale_1_0_a( ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 1, 0 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2062,7 +2128,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp8__v8i32_fp6( ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2085,7 +2152,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp6__v8i32_fp8( ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] cbsz:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2108,7 +2176,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp6__v8i32_fp6( ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] cbsz:2 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2131,7 +2199,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp6__v8i32_fp6_ ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] cbsz:2 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2154,7 +2222,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp8__v8i32_fp4( ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2177,7 +2246,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp4__v8i32_fp8( ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] cbsz:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2200,7 +2270,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp8__v6i32_fp4( ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2223,7 +2294,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v6i32_fp4__v8i32_fp8( ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] cbsz:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2246,7 +2318,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp4__v8i32_fp4( ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] cbsz:4 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2269,7 +2341,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp4__v8i32_fp4_ ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] cbsz:4 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll index 426764d91b8a1..05f8739e7cb89 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll @@ -38,7 +38,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp0(<8 x ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -81,7 +83,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp0(<8 x ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -131,7 +135,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_1_1__cbsz1__blgp1(<8 x ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -174,7 +180,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_1_1__cbsz1__blgp1(<8 x ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -224,7 +232,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_2_2__cbsz1__blgp1(<8 x ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -267,7 +277,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_2_2__cbsz1__blgp1(<8 x ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -317,7 +329,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_3_3__cbsz1__blgp1(<8 x ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -360,7 +374,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_3_3__cbsz1__blgp1(<8 x ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -410,7 +426,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_3__cbsz1__blgp1(<8 x ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -453,7 +471,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_3__cbsz1__blgp1(<8 x ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -503,7 +523,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_3_0__cbsz1__blgp1(<8 x ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -546,7 +568,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_3_0__cbsz1__blgp1(<8 x ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -596,7 +620,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_2_3__cbsz1__blgp1(<8 x ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -639,7 +665,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_2_3__cbsz1__blgp1(<8 x ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -689,7 +717,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_3_2__cbsz1__blgp1(<8 x ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -732,7 +762,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_3_2__cbsz1__blgp1(<8 x ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -781,7 +813,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp0__cons ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -832,7 +866,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp1(<8 x ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] blgp:1 -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -875,7 +911,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp1(<8 x ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] blgp:1 -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -923,7 +961,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp1__cons ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15] blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -973,7 +1013,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp2(<8 x ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1020,7 +1062,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp2__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v29 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:13], a[0:15] blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1070,7 +1114,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp3(<8 x ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1117,7 +1163,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp3__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v29 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:13], a[0:15] blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1165,7 +1213,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp4(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:11], a[0:15], v28, v29 op_sel_hi:[0,0,0] blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1212,7 +1262,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp4__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:11], a[0:15] blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1263,7 +1315,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp0(<8 x ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] cbsz:1 -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1306,7 +1360,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp0(<8 x ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] cbsz:1 -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1354,7 +1410,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp0__cons ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15] cbsz:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1405,7 +1463,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp1(<8 x ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] cbsz:1 blgp:1 -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1448,7 +1508,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp1(<8 x ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] cbsz:1 blgp:1 -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1497,7 +1559,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp1__cons ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15] cbsz:1 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1547,7 +1611,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp2(<8 x ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] cbsz:1 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1594,7 +1660,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp2__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v29 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:13], a[0:15] cbsz:1 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1644,7 +1712,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp3(<8 x ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] cbsz:1 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1691,7 +1761,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp3__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v29 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:13], a[0:15] cbsz:1 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1739,7 +1811,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp4(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:11], a[0:15], v28, v29 op_sel_hi:[0,0,0] cbsz:1 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1786,7 +1860,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp4__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:11], a[0:15] cbsz:1 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1836,7 +1912,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp0(<6 x ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] cbsz:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1883,7 +1961,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp0__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v29 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:13], a[0:15] cbsz:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1933,7 +2013,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp1(<6 x ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] cbsz:2 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1980,7 +2062,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp1__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v29 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:13], a[0:15] cbsz:2 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2028,7 +2112,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp2(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:11], a[0:15], v28, v29 op_sel_hi:[0,0,0] cbsz:2 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2075,7 +2160,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp2__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:11], a[0:15] cbsz:2 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2123,7 +2209,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp3(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:11], a[0:15], v28, v29 op_sel_hi:[0,0,0] cbsz:2 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2170,7 +2257,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp3__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:11], a[0:15] cbsz:2 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2221,7 +2309,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp0(<6 x ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] cbsz:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2268,7 +2358,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp0__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v29 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:13], a[0:15] cbsz:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2318,7 +2410,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp1(<6 x ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] cbsz:3 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2365,7 +2459,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp1__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v29 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:13], a[0:15] cbsz:3 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2413,7 +2509,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp2(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:11], a[0:15], v28, v29 op_sel_hi:[0,0,0] cbsz:3 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2460,7 +2557,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp2__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:11], a[0:15] cbsz:3 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2508,7 +2606,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp4(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a15, v25 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:9], a[0:15], v26, v27 op_sel_hi:[0,0,0] cbsz:3 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2555,7 +2654,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp4__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v25 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:9], a[0:15] cbsz:3 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2603,7 +2703,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp3(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:11], a[0:15], v28, v29 op_sel_hi:[0,0,0] cbsz:3 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2650,7 +2751,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp3__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:11], a[0:15] cbsz:3 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2698,7 +2800,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp4(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a15, v25 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:9], a[0:15], v26, v27 op_sel_hi:[0,0,0] cbsz:2 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2745,7 +2848,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp4__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v25 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:9], a[0:15] cbsz:2 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2793,7 +2897,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp0(<4 x ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:11], a[0:15], v28, v29 op_sel_hi:[0,0,0] cbsz:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2840,7 +2946,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp0__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:11], a[0:15] cbsz:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2888,7 +2996,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp1(<4 x ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:11], a[0:15], v28, v29 op_sel_hi:[0,0,0] cbsz:4 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2935,7 +3045,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp1__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:11], a[0:15] cbsz:4 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2983,7 +3095,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp2(<4 x ; GCN-NEXT: v_accvgpr_write_b32 a15, v25 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:9], a[0:15], v26, v27 op_sel_hi:[0,0,0] cbsz:4 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3030,7 +3143,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp2__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v25 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:9], a[0:15] cbsz:4 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3078,7 +3192,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp3(<4 x ; GCN-NEXT: v_accvgpr_write_b32 a15, v25 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:9], a[0:15], v26, v27 op_sel_hi:[0,0,0] cbsz:4 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3125,7 +3240,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp3__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v25 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:9], a[0:15] cbsz:4 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3173,7 +3289,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp4(<4 x ; GCN-NEXT: v_accvgpr_write_b32 a15, v23 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:7], a[0:15], v24, v25 op_sel_hi:[0,0,0] cbsz:4 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3220,7 +3337,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp4__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v23 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:4 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3273,7 +3391,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__sgpr_scaleA__sgpr_ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v16 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3319,7 +3439,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__sgpr_scaleA__vgpr_ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v31 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3365,7 +3487,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__vgpr_scaleA__sgpr_ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, s0 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3391,6 +3515,14 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgprs(<8 x i32> inr ; SDAG-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgprs: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v32, s0 +; SDAG-NEXT: v_mov_b32_e32 v33, s1 +; SDAG-NEXT: v_mov_b32_e32 v34, s2 +; SDAG-NEXT: v_mov_b32_e32 v35, s3 +; SDAG-NEXT: v_mov_b32_e32 v36, s16 +; SDAG-NEXT: v_mov_b32_e32 v37, s17 +; SDAG-NEXT: v_mov_b32_e32 v38, s18 +; SDAG-NEXT: v_mov_b32_e32 v39, s19 ; SDAG-NEXT: v_mov_b32_e32 v16, s28 ; SDAG-NEXT: v_mov_b32_e32 v31, v13 ; SDAG-NEXT: v_mov_b32_e32 v30, v12 @@ -3416,14 +3548,6 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgprs(<8 x i32> inr ; SDAG-NEXT: v_mov_b32_e32 v5, s25 ; SDAG-NEXT: v_mov_b32_e32 v6, s26 ; SDAG-NEXT: v_mov_b32_e32 v7, s27 -; SDAG-NEXT: v_mov_b32_e32 v32, s0 -; SDAG-NEXT: v_mov_b32_e32 v33, s1 -; SDAG-NEXT: v_mov_b32_e32 v34, s2 -; SDAG-NEXT: v_mov_b32_e32 v35, s3 -; SDAG-NEXT: v_mov_b32_e32 v36, s16 -; SDAG-NEXT: v_mov_b32_e32 v37, s17 -; SDAG-NEXT: v_mov_b32_e32 v38, s18 -; SDAG-NEXT: v_mov_b32_e32 v39, s19 ; SDAG-NEXT: v_accvgpr_write_b32 a1, v17 ; SDAG-NEXT: v_accvgpr_write_b32 a2, v18 ; SDAG-NEXT: v_accvgpr_write_b32 a3, v19 @@ -3441,7 +3565,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgprs(<8 x i32> inr ; SDAG-NEXT: v_accvgpr_write_b32 a15, v31 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[32:39], v[0:7], a[0:15], v14, v15 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3465,7 +3591,6 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgprs(<8 x i32> inr ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: s_mov_b32 s12, s0 ; GISEL-NEXT: s_mov_b32 s13, s1 -; GISEL-NEXT: v_mov_b32_e32 v16, s28 ; GISEL-NEXT: s_mov_b32 s14, s2 ; GISEL-NEXT: s_mov_b32 s15, s3 ; GISEL-NEXT: v_mov_b32_e32 v18, v0 @@ -3476,6 +3601,11 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgprs(<8 x i32> inr ; GISEL-NEXT: v_mov_b32_e32 v23, v5 ; GISEL-NEXT: v_mov_b32_e32 v24, v6 ; GISEL-NEXT: v_mov_b32_e32 v25, v7 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[18:19] +; GISEL-NEXT: v_mov_b32_e32 v16, s28 ; GISEL-NEXT: v_mov_b32_e32 v26, v8 ; GISEL-NEXT: v_mov_b32_e32 v27, v9 ; GISEL-NEXT: v_mov_b32_e32 v28, v10 @@ -3483,12 +3613,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgprs(<8 x i32> inr ; GISEL-NEXT: v_mov_b32_e32 v30, v12 ; GISEL-NEXT: v_mov_b32_e32 v31, v13 ; GISEL-NEXT: v_mov_b32_e32 v17, s29 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[12:13] ; GISEL-NEXT: v_mov_b64_e32 v[38:39], s[26:27] ; GISEL-NEXT: v_accvgpr_write_b32 a0, v16 -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[18:19] ; GISEL-NEXT: v_mov_b64_e32 v[36:37], s[24:25] ; GISEL-NEXT: v_mov_b64_e32 v[34:35], s[22:23] ; GISEL-NEXT: v_mov_b64_e32 v[32:33], s[20:21] @@ -3509,7 +3635,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgprs(<8 x i32> inr ; GISEL-NEXT: v_accvgpr_write_b32 a15, v31 ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[32:39], a[0:15], v14, v15 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3535,7 +3663,6 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__sgp ; SDAG-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__sgpr_vgpr: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_accvgpr_write_b32 a0, v8 ; SDAG-NEXT: v_mov_b32_e32 v26, s0 ; SDAG-NEXT: v_mov_b32_e32 v27, s1 ; SDAG-NEXT: v_mov_b32_e32 v28, s2 @@ -3544,6 +3671,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__sgp ; SDAG-NEXT: v_mov_b32_e32 v31, s17 ; SDAG-NEXT: v_mov_b32_e32 v32, s18 ; SDAG-NEXT: v_mov_b32_e32 v33, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a0, v8 ; SDAG-NEXT: v_accvgpr_write_b32 a1, v9 ; SDAG-NEXT: v_accvgpr_write_b32 a2, v10 ; SDAG-NEXT: v_accvgpr_write_b32 a3, v11 @@ -3561,7 +3689,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__sgp ; SDAG-NEXT: v_accvgpr_write_b32 a15, v23 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[26:33], v[0:7], a[0:15], s20, v24 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3588,10 +3718,10 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__sgp ; GISEL-NEXT: s_mov_b32 s14, s2 ; GISEL-NEXT: s_mov_b32 s15, s3 ; GISEL-NEXT: v_mov_b64_e32 v[32:33], s[18:19] -; GISEL-NEXT: v_accvgpr_write_b32 a0, v8 ; GISEL-NEXT: v_mov_b64_e32 v[30:31], s[16:17] ; GISEL-NEXT: v_mov_b64_e32 v[28:29], s[14:15] ; GISEL-NEXT: v_mov_b64_e32 v[26:27], s[12:13] +; GISEL-NEXT: v_accvgpr_write_b32 a0, v8 ; GISEL-NEXT: v_accvgpr_write_b32 a1, v9 ; GISEL-NEXT: v_accvgpr_write_b32 a2, v10 ; GISEL-NEXT: v_accvgpr_write_b32 a3, v11 @@ -3609,7 +3739,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__sgp ; GISEL-NEXT: v_accvgpr_write_b32 a15, v23 ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[26:33], v[0:7], a[0:15], s20, v24 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3635,7 +3767,6 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__vgp ; SDAG-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__vgpr_sgpr: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_accvgpr_write_b32 a0, v8 ; SDAG-NEXT: v_mov_b32_e32 v26, s0 ; SDAG-NEXT: v_mov_b32_e32 v27, s1 ; SDAG-NEXT: v_mov_b32_e32 v28, s2 @@ -3644,6 +3775,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__vgp ; SDAG-NEXT: v_mov_b32_e32 v31, s17 ; SDAG-NEXT: v_mov_b32_e32 v32, s18 ; SDAG-NEXT: v_mov_b32_e32 v33, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a0, v8 ; SDAG-NEXT: v_accvgpr_write_b32 a1, v9 ; SDAG-NEXT: v_accvgpr_write_b32 a2, v10 ; SDAG-NEXT: v_accvgpr_write_b32 a3, v11 @@ -3661,7 +3793,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__vgp ; SDAG-NEXT: v_accvgpr_write_b32 a15, v23 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[26:33], v[0:7], a[0:15], v24, s20 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3688,10 +3822,10 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__vgp ; GISEL-NEXT: s_mov_b32 s14, s2 ; GISEL-NEXT: s_mov_b32 s15, s3 ; GISEL-NEXT: v_mov_b64_e32 v[32:33], s[18:19] -; GISEL-NEXT: v_accvgpr_write_b32 a0, v8 ; GISEL-NEXT: v_mov_b64_e32 v[30:31], s[16:17] ; GISEL-NEXT: v_mov_b64_e32 v[28:29], s[14:15] ; GISEL-NEXT: v_mov_b64_e32 v[26:27], s[12:13] +; GISEL-NEXT: v_accvgpr_write_b32 a0, v8 ; GISEL-NEXT: v_accvgpr_write_b32 a1, v9 ; GISEL-NEXT: v_accvgpr_write_b32 a2, v10 ; GISEL-NEXT: v_accvgpr_write_b32 a3, v11 @@ -3709,7 +3843,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__vgp ; GISEL-NEXT: v_accvgpr_write_b32 a15, v23 ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[26:33], v[0:7], a[0:15], v24, s20 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3761,7 +3897,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_vgpr_sgpr_vgpr__vgp ; SDAG-NEXT: v_accvgpr_write_b32 a15, v23 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[26:33], a[0:15], v24, s20 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3809,7 +3947,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_vgpr_sgpr_vgpr__vgp ; GISEL-NEXT: v_accvgpr_write_b32 a15, v23 ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[26:33], a[0:15], v24, s20 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3853,7 +3993,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_vgpr_vgpr_sgpr__vgp ; SDAG-NEXT: v_accvgpr_write_b32 a15, s27 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v16, s28 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3897,7 +4039,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_vgpr_vgpr_sgpr__vgp ; GISEL-NEXT: v_accvgpr_write_b32 a15, s27 ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v16, s28 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3923,6 +4067,14 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_sgpr__vgp ; SDAG-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_sgpr__vgpr_sgpr: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v32, s0 +; SDAG-NEXT: v_mov_b32_e32 v33, s1 +; SDAG-NEXT: v_mov_b32_e32 v34, s2 +; SDAG-NEXT: v_mov_b32_e32 v35, s3 +; SDAG-NEXT: v_mov_b32_e32 v36, s16 +; SDAG-NEXT: v_mov_b32_e32 v37, s17 +; SDAG-NEXT: v_mov_b32_e32 v38, s18 +; SDAG-NEXT: v_mov_b32_e32 v39, s19 ; SDAG-NEXT: v_mov_b32_e32 v16, s20 ; SDAG-NEXT: v_mov_b32_e32 v31, v13 ; SDAG-NEXT: v_mov_b32_e32 v30, v12 @@ -3940,14 +4092,6 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_sgpr__vgp ; SDAG-NEXT: v_mov_b32_e32 v24, s28 ; SDAG-NEXT: v_mov_b32_e32 v25, s29 ; SDAG-NEXT: v_accvgpr_write_b32 a0, v16 -; SDAG-NEXT: v_mov_b32_e32 v32, s0 -; SDAG-NEXT: v_mov_b32_e32 v33, s1 -; SDAG-NEXT: v_mov_b32_e32 v34, s2 -; SDAG-NEXT: v_mov_b32_e32 v35, s3 -; SDAG-NEXT: v_mov_b32_e32 v36, s16 -; SDAG-NEXT: v_mov_b32_e32 v37, s17 -; SDAG-NEXT: v_mov_b32_e32 v38, s18 -; SDAG-NEXT: v_mov_b32_e32 v39, s19 ; SDAG-NEXT: v_accvgpr_write_b32 a1, v17 ; SDAG-NEXT: v_accvgpr_write_b32 a2, v18 ; SDAG-NEXT: v_accvgpr_write_b32 a3, v19 @@ -3965,7 +4109,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_sgpr__vgp ; SDAG-NEXT: v_accvgpr_write_b32 a15, v31 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[32:39], v[0:7], a[0:15], v14, v15 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3987,11 +4133,15 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_sgpr__vgp ; GISEL-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_sgpr__vgpr_sgpr: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: v_mov_b32_e32 v16, s20 ; GISEL-NEXT: s_mov_b32 s12, s0 ; GISEL-NEXT: s_mov_b32 s13, s1 ; GISEL-NEXT: s_mov_b32 s14, s2 ; GISEL-NEXT: s_mov_b32 s15, s3 +; GISEL-NEXT: v_mov_b64_e32 v[38:39], s[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[36:37], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[34:35], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[32:33], s[12:13] +; GISEL-NEXT: v_mov_b32_e32 v16, s20 ; GISEL-NEXT: v_mov_b32_e32 v26, v8 ; GISEL-NEXT: v_mov_b32_e32 v27, v9 ; GISEL-NEXT: v_mov_b32_e32 v28, v10 @@ -4007,11 +4157,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_sgpr__vgp ; GISEL-NEXT: v_mov_b32_e32 v23, s27 ; GISEL-NEXT: v_mov_b32_e32 v24, s28 ; GISEL-NEXT: v_mov_b32_e32 v25, s29 -; GISEL-NEXT: v_mov_b64_e32 v[38:39], s[18:19] ; GISEL-NEXT: v_accvgpr_write_b32 a0, v16 -; GISEL-NEXT: v_mov_b64_e32 v[36:37], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[34:35], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[32:33], s[12:13] ; GISEL-NEXT: v_accvgpr_write_b32 a1, v17 ; GISEL-NEXT: v_accvgpr_write_b32 a2, v18 ; GISEL-NEXT: v_accvgpr_write_b32 a3, v19 @@ -4029,7 +4175,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_sgpr__vgp ; GISEL-NEXT: v_accvgpr_write_b32 a15, v31 ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[32:39], v[0:7], a[0:15], v14, v15 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -4074,7 +4222,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_inlineimm__ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], 33, -2 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -4120,7 +4270,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, -2 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -4162,7 +4314,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, -2 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -4209,7 +4363,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v31 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -4252,7 +4408,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -4280,7 +4438,6 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd(<8 x i32> ; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0 ; SDAG-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x40 ; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x80 -; SDAG-NEXT: v_mov_b32_e32 v16, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SDAG-NEXT: v_accvgpr_write_b32 a0, s36 ; SDAG-NEXT: v_mov_b32_e32 v0, s8 @@ -4314,14 +4471,17 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd(<8 x i32> ; SDAG-NEXT: v_accvgpr_write_b32 a13, s49 ; SDAG-NEXT: v_accvgpr_write_b32 a14, s50 ; SDAG-NEXT: v_accvgpr_write_b32 a15, s51 -; SDAG-NEXT: v_mov_b32_e32 v17, s1 +; SDAG-NEXT: v_mov_b32_e32 v16, s1 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v17 op_sel_hi:[0,0,0] blgp:2 -; SDAG-NEXT: s_nop 3 -; SDAG-NEXT: global_store_dwordx4 v16, a[12:15], s[2:3] offset:48 -; SDAG-NEXT: global_store_dwordx4 v16, a[8:11], s[2:3] offset:32 -; SDAG-NEXT: global_store_dwordx4 v16, a[4:7], s[2:3] offset:16 -; SDAG-NEXT: global_store_dwordx4 v16, a[0:3], s[2:3] +; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v16 op_sel_hi:[0,0,0] blgp:2 +; SDAG-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[2:3] offset:48 +; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[2:3] offset:32 +; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[2:3] offset:16 +; SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[2:3] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd: @@ -4331,11 +4491,11 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd(<8 x i32> ; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x80 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] -; GISEL-NEXT: v_accvgpr_write_b32 a0, s36 ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] ; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] ; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s36 ; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19] ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23] @@ -4358,7 +4518,9 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd(<8 x i32> ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v16 op_sel_hi:[0,0,0] blgp:2 ; GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[2:3] ; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[2:3] offset:16 ; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[2:3] offset:32 @@ -4372,13 +4534,11 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd(<8 x i32> define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd___scaleA_kimm__scaleB__inlineimm(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, ptr addrspace(1) %ptr) #0 { ; SDAG-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd___scaleA_kimm__scaleB__inlineimm: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x40 ; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0 -; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x80 +; SDAG-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x40 ; SDAG-NEXT: s_movk_i32 s2, 0x41 -; SDAG-NEXT: v_mov_b32_e32 v16, 0 +; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x80 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_accvgpr_write_b32 a0, s36 ; SDAG-NEXT: v_mov_b32_e32 v0, s8 ; SDAG-NEXT: v_mov_b32_e32 v1, s9 ; SDAG-NEXT: v_mov_b32_e32 v2, s10 @@ -4387,6 +4547,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd___scaleA_ ; SDAG-NEXT: v_mov_b32_e32 v5, s13 ; SDAG-NEXT: v_mov_b32_e32 v6, s14 ; SDAG-NEXT: v_mov_b32_e32 v7, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a0, s36 ; SDAG-NEXT: v_mov_b32_e32 v8, s16 ; SDAG-NEXT: v_mov_b32_e32 v9, s17 ; SDAG-NEXT: v_mov_b32_e32 v10, s18 @@ -4412,26 +4573,29 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd___scaleA_ ; SDAG-NEXT: v_accvgpr_write_b32 a15, s51 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s2, -2 op_sel_hi:[0,0,0] blgp:2 -; SDAG-NEXT: s_nop 3 -; SDAG-NEXT: global_store_dwordx4 v16, a[12:15], s[0:1] offset:48 -; SDAG-NEXT: global_store_dwordx4 v16, a[8:11], s[0:1] offset:32 -; SDAG-NEXT: global_store_dwordx4 v16, a[4:7], s[0:1] offset:16 -; SDAG-NEXT: global_store_dwordx4 v16, a[0:3], s[0:1] +; SDAG-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 +; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 +; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 +; SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd___scaleA_kimm__scaleB__inlineimm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0 ; GISEL-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x40 -; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x80 ; GISEL-NEXT: v_mov_b32_e32 v16, 0x41 +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x80 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] -; GISEL-NEXT: v_accvgpr_write_b32 a0, s36 ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] ; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] ; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s36 ; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19] ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23] @@ -4453,7 +4617,9 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd___scaleA_ ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v16, -2 op_sel_hi:[0,0,0] blgp:2 ; GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] ; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 ; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 @@ -4551,13 +4717,15 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__nonmac(<8 x ; GISEL-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x0 ; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x40 ; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x80 +; GISEL-NEXT: v_mov_b64_e32 v[18:19], 16 +; GISEL-NEXT: v_mov_b64_e32 v[20:21], 32 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[36:37] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[44:45] -; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[38:39] ; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[40:41] ; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[42:43] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[44:45] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 ; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[46:47] ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[48:49] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[50:51] @@ -4577,36 +4745,34 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__nonmac(<8 x ; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 ; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 ; GISEL-NEXT: v_mov_b32_e32 v16, s1 -; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_mov_b64_e32 v[22:23], 48 +; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v16 op_sel_hi:[0,0,0] ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[16:17], 0 ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], 0 -; GISEL-NEXT: global_store_dwordx4 v[4:5], v[0:3], off sc0 sc1 +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23] +; GISEL-NEXT: global_store_dwordx4 v[16:17], v[0:3], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[6:7], 16 -; GISEL-NEXT: v_mov_b64_e32 v[8:9], 32 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[14:15] -; GISEL-NEXT: global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[18:19], v[4:7], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[10:11], 48 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[18:19] -; GISEL-NEXT: global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[20:21], v[8:11], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[20:21] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[22:23] -; GISEL-NEXT: global_store_dwordx4 v[10:11], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[22:23], v[12:15], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[4:5], a[0:3], off sc0 sc1 +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: global_store_dwordx4 v[16:17], a[0:3], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[6:7], a[4:7], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[18:19], a[4:7], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[8:9], a[8:11], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[20:21], a[8:11], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[10:11], a[12:15], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[22:23], a[12:15], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_endpgm %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 0, i32 0, i32 0, i32 %scale0, i32 0, i32 %scale1) @@ -4699,13 +4865,16 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_25_42__nonmac(<8 ; GISEL: ; %bb.0: ; GISEL-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x0 ; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x40 +; GISEL-NEXT: v_mov_b64_e32 v[16:17], 0 +; GISEL-NEXT: v_mov_b64_e32 v[18:19], 16 +; GISEL-NEXT: v_mov_b64_e32 v[20:21], 32 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[36:37] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[44:45] -; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[38:39] ; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[40:41] ; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[42:43] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[44:45] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 ; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[46:47] ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[48:49] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[50:51] @@ -4724,35 +4893,33 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_25_42__nonmac(<8 ; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 ; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 ; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[22:23], 48 +; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], 25, 42 op_sel_hi:[0,0,0] blgp:2 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], 0 -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[6:7], 16 -; GISEL-NEXT: global_store_dwordx4 v[4:5], v[16:19], off sc0 sc1 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23] +; GISEL-NEXT: global_store_dwordx4 v[16:17], v[0:3], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[18:19], v[4:7], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[8:9], 32 -; GISEL-NEXT: v_mov_b64_e32 v[10:11], 48 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[18:19] -; GISEL-NEXT: global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[20:21], v[8:11], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[20:21] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[22:23] -; GISEL-NEXT: global_store_dwordx4 v[10:11], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[22:23], v[12:15], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[4:5], a[0:3], off sc0 sc1 +; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: global_store_dwordx4 v[16:17], a[0:3], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[6:7], a[4:7], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[18:19], a[4:7], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[8:9], a[8:11], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[20:21], a[8:11], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[10:11], a[12:15], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[22:23], a[12:15], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_endpgm %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 0, i32 2, i32 0, i32 25, i32 0, i32 42) @@ -4845,13 +5012,16 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__vgprcd_nonma ; GISEL: ; %bb.0: ; GISEL-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x0 ; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x40 +; GISEL-NEXT: v_mov_b64_e32 v[16:17], 0 +; GISEL-NEXT: v_mov_b64_e32 v[18:19], 16 +; GISEL-NEXT: v_mov_b64_e32 v[20:21], 32 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[36:37] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[44:45] -; GISEL-NEXT: v_accvgpr_write_b32 a31, s23 ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[38:39] ; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[40:41] ; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[42:43] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[44:45] +; GISEL-NEXT: v_accvgpr_write_b32 a31, s23 ; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[46:47] ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[48:49] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[50:51] @@ -4870,35 +5040,33 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__vgprcd_nonma ; GISEL-NEXT: v_accvgpr_write_b32 a18, s10 ; GISEL-NEXT: v_accvgpr_write_b32 a17, s9 ; GISEL-NEXT: v_accvgpr_write_b32 a16, s8 -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[22:23], 48 +; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[16:31] blgp:2 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], 0 -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[6:7], 16 -; GISEL-NEXT: global_store_dwordx4 v[4:5], v[16:19], off sc0 sc1 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23] +; GISEL-NEXT: global_store_dwordx4 v[16:17], v[0:3], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[18:19], v[4:7], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[8:9], 32 -; GISEL-NEXT: v_mov_b64_e32 v[10:11], 48 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[18:19] -; GISEL-NEXT: global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[20:21], v[8:11], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[20:21] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[22:23] -; GISEL-NEXT: global_store_dwordx4 v[10:11], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[22:23], v[12:15], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[4:5], a[0:3], off sc0 sc1 +; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: global_store_dwordx4 v[16:17], a[0:3], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[6:7], a[4:7], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[18:19], a[4:7], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[8:9], a[8:11], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[20:21], a[8:11], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[10:11], a[12:15], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[22:23], a[12:15], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_endpgm %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0) @@ -4991,13 +5159,16 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_25_42__vgprcd_non ; GISEL: ; %bb.0: ; GISEL-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x0 ; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x40 +; GISEL-NEXT: v_mov_b64_e32 v[16:17], 0 +; GISEL-NEXT: v_mov_b64_e32 v[18:19], 16 +; GISEL-NEXT: v_mov_b64_e32 v[20:21], 32 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[36:37] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[44:45] -; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[38:39] ; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[40:41] ; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[42:43] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[44:45] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 ; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[46:47] ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[48:49] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[50:51] @@ -5016,35 +5187,33 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_25_42__vgprcd_non ; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 ; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 ; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[22:23], 48 +; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], 25, 42 op_sel_hi:[0,0,0] blgp:2 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], 0 -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[6:7], 16 -; GISEL-NEXT: global_store_dwordx4 v[4:5], v[16:19], off sc0 sc1 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23] +; GISEL-NEXT: global_store_dwordx4 v[16:17], v[0:3], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[18:19], v[4:7], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[8:9], 32 -; GISEL-NEXT: v_mov_b64_e32 v[10:11], 48 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[18:19] -; GISEL-NEXT: global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[20:21], v[8:11], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[20:21] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[22:23] -; GISEL-NEXT: global_store_dwordx4 v[10:11], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[22:23], v[12:15], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[4:5], a[0:3], off sc0 sc1 +; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: global_store_dwordx4 v[16:17], a[0:3], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[6:7], a[4:7], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[18:19], a[4:7], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[8:9], a[8:11], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[20:21], a[8:11], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[10:11], a[12:15], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[22:23], a[12:15], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_endpgm %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 0, i32 2, i32 0, i32 25, i32 0, i32 42) @@ -5076,7 +5245,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___constant_scale_0_0_a( ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5121,7 +5292,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___constant_scale_0_0_b( ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5166,7 +5339,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___constant_scale_0_1(<8 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], 0, 1 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5211,7 +5386,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___constant_scale_1_0_a( ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], 1, 0 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5262,7 +5439,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp8__v8i32_fp6( ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] blgp:2 -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5305,7 +5484,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp8__v8i32_fp6( ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] blgp:2 -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5355,7 +5536,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp6__v8i32_fp8( ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] cbsz:2 -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5398,7 +5581,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp6__v8i32_fp8( ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] cbsz:2 -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5448,7 +5633,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp6__v8i32_fp6( ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] cbsz:2 blgp:2 -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5491,7 +5677,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp6__v8i32_fp6( ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] cbsz:2 blgp:2 -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5539,7 +5726,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp6__v8i32_fp6_ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15] cbsz:2 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5589,7 +5777,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp8__v8i32_fp4( ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] blgp:4 -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5632,7 +5822,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp8__v8i32_fp4( ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] blgp:4 -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5682,7 +5874,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp4__v8i32_fp8( ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] cbsz:4 -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5725,7 +5919,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp4__v8i32_fp8( ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] cbsz:4 -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5774,7 +5970,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp8__v6i32_fp4( ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5823,7 +6021,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v6i32_fp4__v8i32_fp8( ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] cbsz:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5873,7 +6073,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp4__v8i32_fp4( ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] cbsz:4 blgp:4 -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5916,7 +6117,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp4__v8i32_fp4( ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] cbsz:4 blgp:4 -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5964,7 +6166,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp4__v8i32_fp4_ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15] cbsz:4 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane16.swap.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane16.swap.ll new file mode 100644 index 0000000000000..e1cebe28f7fe8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane16.swap.ll @@ -0,0 +1,127 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefix=GCN %s + +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR-SDAG %s +; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR-GISEL %s + +; ERR-SDAG: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.permlane16.swap +; ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32), %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.permlane16.swap) + + +declare { i32, i32 } @llvm.amdgcn.permlane16.swap(i32, i32, i1 immarg, i1 immarg) + +define { i32, i32 } @v_permlane16_swap_b32_vv(i32 %vdst_old, i32 %src0_old) { +; GCN-LABEL: v_permlane16_swap_b32_vv: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %vdst_old, i32 %src0_old, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane16_swap_b32_vi(i32 %vdst_old) { +; GCN-LABEL: v_permlane16_swap_b32_vi: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, 1 +; GCN-NEXT: s_nop 1 +; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %vdst_old, i32 1, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane16_swap_b32_vl(i32 %vdst_old) { +; GCN-LABEL: v_permlane16_swap_b32_vl: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, 0xc1d1 +; GCN-NEXT: s_nop 1 +; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %vdst_old, i32 49617, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane16_swap_b32_iv(i32 %src0_old) { +; GCN-LABEL: v_permlane16_swap_b32_iv: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, v0 +; GCN-NEXT: v_mov_b32_e32 v0, 1 +; GCN-NEXT: s_nop 1 +; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 1, i32 %src0_old, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane16_swap_b32_ss(i32 inreg %vdst_old, i32 inreg %src0_old) { +; GCN-LABEL: v_permlane16_swap_b32_ss: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: s_nop 1 +; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %vdst_old, i32 %src0_old, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane16_swap_b32_sv(i32 inreg %vdst_old, i32 %src0_old) { +; GCN-LABEL: v_permlane16_swap_b32_sv: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, v0 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: s_nop 1 +; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %vdst_old, i32 %src0_old, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane16_swap_b32_vs(i32 %vdst_old, i32 inreg %src0_old) { +; GCN-LABEL: v_permlane16_swap_b32_vs: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, s0 +; GCN-NEXT: s_nop 1 +; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %vdst_old, i32 %src0_old, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane16_swap_b32_vv_fi(i32 %vdst_old, i32 %src0_old) { +; GCN-LABEL: v_permlane16_swap_b32_vv_fi: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_permlane16_swap_b32_e64 v0, v1 fi:1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %vdst_old, i32 %src0_old, i1 true, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane16_swap_b32_vv_bc(i32 %vdst_old, i32 %src0_old) { +; GCN-LABEL: v_permlane16_swap_b32_vv_bc: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_permlane16_swap_b32_e64 v0, v1 bound_ctrl:1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %vdst_old, i32 %src0_old, i1 false, i1 true) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane16_swap_b32_vv_fi_bc(i32 %vdst_old, i32 %src0_old) { +; GCN-LABEL: v_permlane16_swap_b32_vv_fi_bc: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_permlane16_swap_b32_e64 v0, v1 bound_ctrl:1 fi:1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %vdst_old, i32 %src0_old, i1 true, i1 true) + ret { i32, i32 } %v +} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane32.swap.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane32.swap.ll new file mode 100644 index 0000000000000..121c379053fcf --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane32.swap.ll @@ -0,0 +1,127 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefix=GCN %s + +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR-SDAG %s +; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR-GISEL %s + +; ERR-SDAG: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.permlane32.swap +; ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32), %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.permlane32.swap) + + +declare { i32, i32 } @llvm.amdgcn.permlane32.swap(i32, i32, i1 immarg, i1 immarg) + +define { i32, i32 } @v_permlane32_swap_b32_vv(i32 %vdst_old, i32 %src0_old) { +; GCN-LABEL: v_permlane32_swap_b32_vv: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_permlane32_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 %vdst_old, i32 %src0_old, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane32_swap_b32_vi(i32 %vdst_old) { +; GCN-LABEL: v_permlane32_swap_b32_vi: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, 1 +; GCN-NEXT: s_nop 1 +; GCN-NEXT: v_permlane32_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 %vdst_old, i32 1, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane32_swap_b32_vl(i32 %vdst_old) { +; GCN-LABEL: v_permlane32_swap_b32_vl: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, 0xc1d1 +; GCN-NEXT: s_nop 1 +; GCN-NEXT: v_permlane32_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 %vdst_old, i32 49617, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane32_swap_b32_iv(i32 %src0_old) { +; GCN-LABEL: v_permlane32_swap_b32_iv: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, v0 +; GCN-NEXT: v_mov_b32_e32 v0, 1 +; GCN-NEXT: s_nop 1 +; GCN-NEXT: v_permlane32_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 1, i32 %src0_old, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane32_swap_b32_ss(i32 inreg %vdst_old, i32 inreg %src0_old) { +; GCN-LABEL: v_permlane32_swap_b32_ss: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: s_nop 1 +; GCN-NEXT: v_permlane32_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 %vdst_old, i32 %src0_old, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane32_swap_b32_sv(i32 inreg %vdst_old, i32 %src0_old) { +; GCN-LABEL: v_permlane32_swap_b32_sv: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, v0 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: s_nop 1 +; GCN-NEXT: v_permlane32_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 %vdst_old, i32 %src0_old, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane32_swap_b32_vs(i32 %vdst_old, i32 inreg %src0_old) { +; GCN-LABEL: v_permlane32_swap_b32_vs: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, s0 +; GCN-NEXT: s_nop 1 +; GCN-NEXT: v_permlane32_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 %vdst_old, i32 %src0_old, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane32_swap_b32_vv_fi(i32 %vdst_old, i32 %src0_old) { +; GCN-LABEL: v_permlane32_swap_b32_vv_fi: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_permlane32_swap_b32_e64 v0, v1 fi:1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 %vdst_old, i32 %src0_old, i1 true, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane32_swap_b32_vv_bc(i32 %vdst_old, i32 %src0_old) { +; GCN-LABEL: v_permlane32_swap_b32_vv_bc: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_permlane32_swap_b32_e64 v0, v1 bound_ctrl:1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 %vdst_old, i32 %src0_old, i1 false, i1 true) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane32_swap_b32_vv_fi_bc(i32 %vdst_old, i32 %src0_old) { +; GCN-LABEL: v_permlane32_swap_b32_vv_fi_bc: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_permlane32_swap_b32_e64 v0, v1 bound_ctrl:1 fi:1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 %vdst_old, i32 %src0_old, i1 true, i1 true) + ret { i32, i32 } %v +} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll index 824d3708c027d..33dd2bd540ad0 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll @@ -4,29 +4,15 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,W32 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s -; RUN: opt -O3 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -O3 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -passes='default' -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -mcpu=tonga -O3 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s - ; GCN-LABEL: {{^}}fold_wavefrontsize: -; OPT-LABEL: define amdgpu_kernel void @fold_wavefrontsize( ; W32: v_mov_b32_e32 [[V:v[0-9]+]], 32 ; W64: v_mov_b32_e32 [[V:v[0-9]+]], 64 ; GCN: store_{{dword|b32}} v{{.+}}, [[V]] -; OPT: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() -; OPT: store i32 %tmp, ptr addrspace(1) %arg, align 4 -; OPT-NEXT: ret void define amdgpu_kernel void @fold_wavefrontsize(ptr addrspace(1) nocapture %arg) { + bb: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0 store i32 %tmp, ptr addrspace(1) %arg, align 4 @@ -34,18 +20,12 @@ bb: } ; GCN-LABEL: {{^}}fold_and_optimize_wavefrontsize: -; OPT-LABEL: define amdgpu_kernel void @fold_and_optimize_wavefrontsize( ; W32: v_mov_b32_e32 [[V:v[0-9]+]], 1{{$}} ; W64: v_mov_b32_e32 [[V:v[0-9]+]], 2{{$}} ; GCN-NOT: cndmask ; GCN: store_{{dword|b32}} v{{.+}}, [[V]] -; OPT: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() -; OPT: %tmp1 = icmp ugt i32 %tmp, 32 -; OPT: %tmp2 = select i1 %tmp1, i32 2, i32 1 -; OPT: store i32 %tmp2, ptr addrspace(1) %arg -; OPT-NEXT: ret void define amdgpu_kernel void @fold_and_optimize_wavefrontsize(ptr addrspace(1) nocapture %arg) { bb: @@ -57,13 +37,6 @@ bb: } ; GCN-LABEL: {{^}}fold_and_optimize_if_wavefrontsize: -; OPT-LABEL: define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize( - -; OPT: bb: -; OPT: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() -; OPT: %tmp1 = icmp ugt i32 %tmp, 32 -; OPT: bb3: -; OPT-NEXT: ret void define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize(ptr addrspace(1) nocapture %arg) { bb: diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir index 9681b01f334f9..d59bcfb16eece 100644 --- a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir +++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir @@ -1,4 +1,5 @@ -# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,GFX940 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,GFX950 %s # GCN-LABEL: name: valu_write_vgpr_sgemm_mfma_read # GCN: V_MOV_B32 @@ -144,7 +145,8 @@ body: | ... # GCN-LABEL: name: sgemm4x4_mfma_write_agpr_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 2 +# GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MFMA name: sgemm4x4_mfma_write_agpr_mfma_read_overlap body: | @@ -164,7 +166,8 @@ body: | ... # GCN-LABEL: name: sgemm4x4_mfma_write_agpr_smfmac_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 2 +# GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_SMFMAC name: sgemm4x4_mfma_write_agpr_smfmac_read_overlap body: | @@ -174,8 +177,11 @@ body: | ... # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_agpr_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 0 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm16x16_mfma_write_agpr_mfma_read_overlap body: | @@ -185,8 +191,11 @@ body: | ... # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 0 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_overlap body: | @@ -215,8 +224,11 @@ body: | ... # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_agpr_smfmac_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 0 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_SMFMAC name: xdl_sgemm16x16_mfma_write_agpr_smfmac_read_overlap body: | @@ -228,7 +240,8 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm32x32_mfma_write_agpr_mfma_read_overlap body: | @@ -240,7 +253,8 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm32x32_mfma_write_vgpr_mfma_read_overlap body: | @@ -272,7 +286,8 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_SMFMAC name: xdl_sgemm32x32_mfma_write_agpr_smfmac_read_overlap body: | @@ -282,8 +297,12 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 0 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 0 # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_vgpr_mfma_read_overlap body: | @@ -303,8 +322,12 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 0 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 0 # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_read_overlap body: | @@ -335,7 +358,8 @@ body: | # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_vgpr_dgemm_mfma_read_overlap # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm16x16_mfma_write_vgpr_dgemm_mfma_read_overlap body: | @@ -347,7 +371,8 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm32x32_mfma_write_vgpr_dgemm_mfma_read_overlap body: | @@ -358,7 +383,8 @@ body: | # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_agpr_mfma_read_partial # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm16x16_mfma_write_agpr_mfma_read_partial body: | @@ -369,7 +395,8 @@ body: | # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_partial # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_partial body: | @@ -507,8 +534,12 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_srca_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_vgpr_mfma_srca_read_overlap body: | @@ -528,8 +559,12 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_srca_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_srca_read_overlap body: | @@ -599,8 +634,12 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_srcb_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_vgpr_mfma_srcb_read_overlap body: | @@ -610,8 +649,12 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_smfmac_srcb_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 # GCN-NEXT: V_SMFMAC name: dgemm16x16_mfma_write_vgpr_smfmac_srcb_read_overlap body: | @@ -621,8 +664,13 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_smfmac_srcc_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 + # GCN-NEXT: V_SMFMAC name: dgemm16x16_mfma_write_vgpr_smfmac_srcc_read_overlap body: | @@ -803,8 +851,12 @@ body: | ... # GCN-LABEL: name: dmfma16x16_write_vgpr_valu_read # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 # GCN-NEXT: V_MOV_B32 name: dmfma16x16_write_vgpr_valu_read body: | @@ -867,8 +919,13 @@ body: | ... # GCN-LABEL: name: dmfma16x16_write_vgpr_dot_read # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 + # GCN-NEXT: V_DOT name: dmfma16x16_write_vgpr_dot_read body: | @@ -1303,8 +1360,12 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 0 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 0 # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_agpr_mfma_read_overlap body: | @@ -1324,8 +1385,13 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_agpr_sgemm_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 0 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 0 + # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_agpr_sgemm_mfma_read_overlap body: | @@ -1346,7 +1412,8 @@ body: | # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_sgpr_dgemm_mfma_read_overlap # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm16x16_mfma_write_sgpr_dgemm_mfma_read_overlap body: | @@ -1358,7 +1425,8 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm32x32_mfma_write_agpr_dgemm_mfma_read_overlap body: | @@ -1398,8 +1466,12 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_srca_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_agpr_mfma_srca_read_overlap body: | @@ -1419,8 +1491,13 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_agpr_sgemm_mfma_srca_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 + # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_agpr_sgemm_mfma_srca_read_overlap body: | @@ -1450,8 +1527,12 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_srcb_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_agpr_mfma_srcb_read_overlap body: | @@ -1505,8 +1586,12 @@ body: | ... # GCN-LABEL: name: dmfma16x16_write_agpr_valu_read # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 # GCN-NEXT: V_ACCVGPR_READ_B32_e64 name: dmfma16x16_write_agpr_valu_read body: | @@ -1575,7 +1660,8 @@ body: | ... # GCN-LABEL: name: sgemm16X16X16_mfma_write_agpr_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 4 +# GFX940-NEXT: S_NOP 4 +# GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_MFMA name: sgemm16X16X16_mfma_write_agpr_mfma_read_overlap body: | @@ -1585,7 +1671,8 @@ body: | ... # GCN-LABEL: name: sgemm16X16X32_mfma_write_agpr_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 4 +# GFX940-NEXT: S_NOP 4 +# GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_MFMA name: sgemm16X16X32_mfma_write_agpr_mfma_read_overlap body: | @@ -1595,7 +1682,8 @@ body: | ... # GCN-LABEL: name: sgemm16X16X16_mfma_write_agpr_dgemm_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 4 +# GFX940-NEXT: S_NOP 4 +# GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_MFMA name: sgemm16X16X16_mfma_write_agpr_dgemm_read_overlap body: | @@ -1605,7 +1693,8 @@ body: | ... # GCN-LABEL: name: sgemm16X16X16_mfma_write_agpr_smfmac_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 4 +# GFX940-NEXT: S_NOP 4 +# GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_SMFMAC name: sgemm16X16X16_mfma_write_agpr_smfmac_read_overlap body: | @@ -1615,7 +1704,8 @@ body: | ... # GCN-LABEL: name: smfmac16x16_write_agpr_smfmac_read_overlap # GCN: V_SMFMAC -# GCN-NEXT: S_NOP 4 +# GFX940-NEXT: S_NOP 4 +# GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_SMFMAC name: smfmac16x16_write_agpr_smfmac_read_overlap body: | @@ -1713,7 +1803,8 @@ body: | ... # GCN-LABEL: name: smfmac16x16x32_mfma_write_agpr_mfma_read_overlap # GCN: V_SMFMAC -# GCN-NEXT: S_NOP 4 +# GFX940-NEXT: S_NOP 4 +# GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_SMFMAC name: smfmac16x16x32_mfma_write_agpr_mfma_read_overlap body: | @@ -1724,7 +1815,8 @@ body: | # GCN-LABEL: name: smfmac32x32x32_mfma_write_agpr_mfma_read_overlap # GCN: V_SMFMAC # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_SMFMAC name: smfmac32x32x32_mfma_write_agpr_mfma_read_overlap body: | @@ -1909,7 +2001,8 @@ body: | ... # GCN-LABEL: name: xdl_sgemm16x16_4pass_mfma_write_agpr_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 4 +# GFX940-NEXT: S_NOP 4 +# GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_MFMA name: xdl_sgemm16x16_4pass_mfma_write_agpr_mfma_read_overlap body: | @@ -1919,7 +2012,8 @@ body: | ... # GCN-LABEL: name: smfmac16x16_mfma_write_agpr_mfma_read_overlap # GCN: V_SMFMAC -# GCN-NEXT: S_NOP 4 +# GFX940-NEXT: S_NOP 4 +# GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_MFMA name: smfmac16x16_mfma_write_agpr_mfma_read_overlap body: | @@ -2033,7 +2127,8 @@ body: | # 2 pass source # GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_xdl_mfma_read_overlap_srcc # GCN: V_MFMA -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 2 +# GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MFMA name: xdl_mfma_2pass_write_vgpr_xdl_mfma_read_overlap_srcc body: | @@ -2078,7 +2173,8 @@ body: | # 4 pass source # GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srcc # GCN: V_MFMA -# GCN-NEXT: S_NOP 4 +# GFX940-NEXT: S_NOP 4 +# GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_MFMA name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srcc body: | @@ -2165,7 +2261,8 @@ body: | # 4 pass source # GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srcc # GCN: V_MFMA -# GCN-NEXT: S_NOP 4 +# GFX940-NEXT: S_NOP 4 +# GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_MFMA name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srcc body: | @@ -2208,7 +2305,8 @@ body: | # GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc body: | @@ -2254,7 +2352,8 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_16pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc body: | @@ -2342,7 +2441,8 @@ body: | # GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srcc # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srcc body: | @@ -2385,7 +2485,8 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_16pass_write_vgpr_xdl_mfma_read_overlap_srcc body: | @@ -2432,7 +2533,8 @@ body: | # 2 pass source # GCN-LABEL: name: xdl_mfma_2pass_write_agpr_smfmac_read_overlap_srcc # GCN: V_MFMA -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 2 +# GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_SMFMAC_ name: xdl_mfma_2pass_write_agpr_smfmac_read_overlap_srcc body: | @@ -2446,7 +2548,8 @@ body: | ... # GCN-LABEL: name: xdl_4pass_mfma_write_agpr_smfmac_read_overlap_srcc # GCN: V_MFMA -# GCN-NEXT: S_NOP 4 +# GFX940-NEXT: S_NOP 4 +# GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_SMFMAC_ name: xdl_4pass_mfma_write_agpr_smfmac_read_overlap_srcc body: | @@ -2460,7 +2563,8 @@ body: | # GCN-LABEL: name: xdl_8pass_mfma_write_agpr_smfmac_read_overlap_srcc # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_SMFMAC_ name: xdl_8pass_mfma_write_agpr_smfmac_read_overlap_srcc body: | @@ -2474,7 +2578,8 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_SMFMAC_ name: xdl_16pass_mfma_write_agpr_smfmac_read_overlap_srcc body: | diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-mfma-scale.gfx950.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-mfma-scale.gfx950.mir index d91ef7b42a4a3..f68b84c7140ba 100644 --- a/llvm/test/CodeGen/AMDGPU/mai-hazards-mfma-scale.gfx950.mir +++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-mfma-scale.gfx950.mir @@ -15,6 +15,7 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 7 ; GCN-NEXT: S_NOP 1 ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, killed $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 @@ -36,6 +37,7 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 1, 1, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 7 ; GCN-NEXT: S_NOP 1 ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, killed $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 @@ -57,6 +59,7 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 2, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 7 ; GCN-NEXT: S_NOP 1 ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, killed $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 @@ -78,6 +81,7 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 2, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 7 ; GCN-NEXT: S_NOP 1 ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, killed $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 @@ -98,7 +102,7 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 2, 2, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 1 + ; GCN-NEXT: S_NOP 5 ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, killed $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 2, 2, implicit $mode, implicit $exec @@ -118,7 +122,7 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 3, 3, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 1 + ; GCN-NEXT: S_NOP 5 ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, killed $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 3, 3, implicit $mode, implicit $exec @@ -138,7 +142,7 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 4, 4, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 1 + ; GCN-NEXT: S_NOP 5 ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, killed $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 4, 4, implicit $mode, implicit $exec @@ -159,6 +163,8 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $sgpr4 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 7 + ; GCN-NEXT: S_NOP 7 ; GCN-NEXT: S_NOP 1 ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $sgpr4, killed $vgpr32, 12, 4, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 @@ -180,6 +186,7 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $sgpr4 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 2, 2, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 7 ; GCN-NEXT: S_NOP 1 ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $sgpr4, killed $vgpr32, 12, 4, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 @@ -201,6 +208,8 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $sgpr4 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 7 + ; GCN-NEXT: S_NOP 7 ; GCN-NEXT: S_NOP 1 ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 @@ -222,6 +231,7 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $sgpr4 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 2, 2, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 7 ; GCN-NEXT: S_NOP 1 ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 @@ -243,7 +253,8 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $sgpr4 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19, 0, 0, $sgpr4, $vgpr21, 12, 4, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 3 + ; GCN-NEXT: S_NOP 7 + ; GCN-NEXT: S_NOP 2 ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, killed $sgpr4, killed $vgpr21, 12, 4, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19, 0, 0, $sgpr4, $vgpr21, 12, 4, implicit $mode, implicit $exec @@ -264,7 +275,7 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $sgpr4 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19, 2, 2, $sgpr4, $vgpr21, 12, 4, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 3 + ; GCN-NEXT: S_NOP 6 ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, killed $sgpr4, killed $vgpr21, 12, 4, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19, 2, 2, $sgpr4, $vgpr21, 12, 4, implicit $mode, implicit $exec diff --git a/llvm/test/CodeGen/DirectX/CreateHandle.ll b/llvm/test/CodeGen/DirectX/CreateHandle.ll index 40b3b2c712272..234d4e035bf1d 100644 --- a/llvm/test/CodeGen/DirectX/CreateHandle.ll +++ b/llvm/test/CodeGen/DirectX/CreateHandle.ll @@ -3,7 +3,7 @@ ; CHECK-PRETTY: Type Format Dim ID HLSL Bind Count ; CHECK-PRETTY: ---------- ------- ----------- ------- -------------- --------- -; CHECK-PRETTY: SRV f32 buf T0 t0 unbounded +; CHECK-PRETTY: SRV f32 buf T0 t7 unbounded ; CHECK-PRETTY: SRV byte r/o T1 t8,space1 1 ; CHECK-PRETTY: SRV struct r/o T2 t2,space4 1 ; CHECK-PRETTY: SRV u32 buf T3 t3,space5 24 @@ -18,44 +18,45 @@ define void @test_buffers() { ; RWBuffer Buf : register(u5, space3) %typed0 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0( - i32 3, i32 5, i32 1, i32 4, i1 false) - ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 4, i1 false) + i32 3, i32 5, i32 1, i32 0, i1 false) + ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 5, i1 false) ; CHECK-NOT: @llvm.dx.cast.handle ; RWBuffer Buf : register(u7, space2) %typed1 = call target("dx.TypedBuffer", i32, 1, 0, 1) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_i32_1_0_1t( - i32 2, i32 7, i32 1, i32 6, i1 false) - ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 6, i1 false) + i32 2, i32 7, i32 1, i32 0, i1 false) + ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 7, i1 false) ; Buffer Buf[24] : register(t3, space5) ; Buffer typed2 = Buf[4] ; Note that the index below is 3 + 4 = 7 %typed2 = call target("dx.TypedBuffer", <4 x i32>, 0, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_i32_0_0_0t( - i32 5, i32 3, i32 24, i32 7, i1 false) + i32 5, i32 3, i32 24, i32 4, i1 false) ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 3, i32 7, i1 false) ; struct S { float4 a; uint4 b; }; ; StructuredBuffer Buf : register(t2, space4) %struct0 = call target("dx.RawBuffer", {<4 x float>, <4 x i32>}, 0, 0) @llvm.dx.handle.fromBinding.tdx.RawBuffer_sl_v4f32v4i32s_0_0t( - i32 4, i32 2, i32 1, i32 10, i1 true) - ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 2, i32 10, i1 true) + i32 4, i32 2, i32 1, i32 0, i1 true) + ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 2, i32 2, i1 true) ; ByteAddressBuffer Buf : register(t8, space1) %byteaddr0 = call target("dx.RawBuffer", i8, 0, 0) @llvm.dx.handle.fromBinding.tdx.RawBuffer_i8_0_0t( - i32 1, i32 8, i32 1, i32 12, i1 false) - ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 1, i32 12, i1 false) + i32 1, i32 8, i32 1, i32 0, i1 false) + ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 1, i32 8, i1 false) - ; Buffer Buf[] : register(t0) + ; Buffer Buf[] : register(t7) ; Buffer typed3 = Buf[ix] %typed3_ix = call i32 @some_val() %typed3 = call target("dx.TypedBuffer", <4 x float>, 0, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_0_0_0t( - i32 0, i32 0, i32 -1, i32 %typed3_ix, i1 false) - ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 %typed3_ix, i1 false) + i32 0, i32 7, i32 -1, i32 %typed3_ix, i1 false) + ; CHECK: %[[IX:.*]] = add i32 %typed3_ix, 7 + ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 %[[IX]], i1 false) ret void } diff --git a/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll b/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll index bce324509184b..aa143dfa8211d 100644 --- a/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll +++ b/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll @@ -3,7 +3,7 @@ ; CHECK-PRETTY: Type Format Dim ID HLSL Bind Count ; CHECK-PRETTY: ---------- ------- ----------- ------- -------------- --------- -; CHECK-PRETTY: SRV f32 buf T0 t0 unbounded +; CHECK-PRETTY: SRV f32 buf T0 t7 unbounded ; CHECK-PRETTY: SRV byte r/o T1 t8,space1 1 ; CHECK-PRETTY: SRV struct r/o T2 t2,space4 1 ; CHECK-PRETTY: SRV u32 buf T3 t3,space5 24 @@ -18,15 +18,15 @@ define void @test_bindings() { ; RWBuffer Buf : register(u5, space3) %typed0 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0( - i32 3, i32 5, i32 1, i32 4, i1 false) - ; CHECK: [[BUF0:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 3, i8 1 }, i32 4, i1 false) + i32 3, i32 5, i32 1, i32 0, i1 false) + ; CHECK: [[BUF0:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 3, i8 1 }, i32 5, i1 false) ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF0]], %dx.types.ResourceProperties { i32 4106, i32 1033 }) ; RWBuffer Buf : register(u7, space2) %typed1 = call target("dx.TypedBuffer", i32, 1, 0, 1) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_i32_1_0_0t( - i32 2, i32 7, i32 1, i32 6, i1 false) - ; CHECK: [[BUF1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 7, i32 7, i32 2, i8 1 }, i32 6, i1 false) + i32 2, i32 7, i32 1, i32 0, i1 false) + ; CHECK: [[BUF1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 7, i32 7, i32 2, i8 1 }, i32 7, i1 false) ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF1]], %dx.types.ResourceProperties { i32 4106, i32 260 }) ; Buffer Buf[24] : register(t3, space5) @@ -34,7 +34,7 @@ define void @test_bindings() { ; Note that the index below is 3 + 4 = 7 %typed2 = call target("dx.TypedBuffer", <4 x i32>, 0, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_i32_0_0_0t( - i32 5, i32 3, i32 24, i32 7, i1 false) + i32 5, i32 3, i32 24, i32 4, i1 false) ; CHECK: [[BUF2:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 26, i32 5, i8 0 }, i32 7, i1 false) ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF2]], %dx.types.ResourceProperties { i32 10, i32 1029 }) @@ -42,24 +42,25 @@ define void @test_bindings() { ; StructuredBuffer Buf : register(t2, space4) %struct0 = call target("dx.RawBuffer", {<4 x float>, <4 x i32>}, 0, 0) @llvm.dx.handle.fromBinding.tdx.RawBuffer_sl_v4f32v4i32s_0_0t( - i32 4, i32 2, i32 1, i32 10, i1 true) - ; CHECK: [[BUF3:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 4, i8 0 }, i32 10, i1 true) + i32 4, i32 2, i32 1, i32 0, i1 true) + ; CHECK: [[BUF3:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 4, i8 0 }, i32 2, i1 true) ; CHECK: = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF3]], %dx.types.ResourceProperties { i32 1036, i32 32 }) ; ByteAddressBuffer Buf : register(t8, space1) %byteaddr0 = call target("dx.RawBuffer", i8, 0, 0) @llvm.dx.handle.fromBinding.tdx.RawBuffer_i8_0_0t( - i32 1, i32 8, i32 1, i32 12, i1 false) - ; CHECK: [[BUF4:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 8, i32 8, i32 1, i8 0 }, i32 12, i1 false) + i32 1, i32 8, i32 1, i32 0, i1 false) + ; CHECK: [[BUF4:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 8, i32 8, i32 1, i8 0 }, i32 8, i1 false) ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF4]], %dx.types.ResourceProperties { i32 11, i32 0 }) - ; Buffer Buf[] : register(t0) + ; Buffer Buf[] : register(t7) ; Buffer typed3 = Buf[ix] %typed3_ix = call i32 @some_val() %typed3 = call target("dx.TypedBuffer", <4 x float>, 0, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_0_0_0t( - i32 0, i32 0, i32 -1, i32 %typed3_ix, i1 false) - ; CHECK: [[BUF5:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 -1, i32 0, i8 0 }, i32 %typed3_ix, i1 false) + i32 0, i32 7, i32 -1, i32 %typed3_ix, i1 false) + ; CHECK: %[[IX:.*]] = add i32 %typed3_ix, 7 + ; CHECK: [[BUF5:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 7, i32 -1, i32 0, i8 0 }, i32 %[[IX]], i1 false) ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF5]], %dx.types.ResourceProperties { i32 10, i32 1033 }) ret void diff --git a/llvm/test/CodeGen/DirectX/ShaderFlags/double-extensions-obj-test.ll b/llvm/test/CodeGen/DirectX/ShaderFlags/double-extensions-obj-test.ll new file mode 100644 index 0000000000000..02a4c2090499a --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ShaderFlags/double-extensions-obj-test.ll @@ -0,0 +1,16 @@ +; RUN: llc %s --filetype=obj -o - | obj2yaml | FileCheck %s + +target triple = "dxil-pc-shadermodel6.7-library" +define double @div(double %a, double %b) #0 { + %res = fdiv double %a, %b + ret double %res +} + +attributes #0 = { convergent norecurse nounwind "hlsl.export"} + +; CHECK: - Name: SFI0 +; CHECK-NEXT: Size: 8 +; CHECK-NEXT: Flags: +; CHECK: Doubles: true +; CHECK: DX11_1_DoubleExtensions: true + diff --git a/llvm/test/CodeGen/DirectX/ShaderFlags/double-extensions.ll b/llvm/test/CodeGen/DirectX/ShaderFlags/double-extensions.ll index a8d5f9c78f0b4..6332ef806a0d8 100644 --- a/llvm/test/CodeGen/DirectX/ShaderFlags/double-extensions.ll +++ b/llvm/test/CodeGen/DirectX/ShaderFlags/double-extensions.ll @@ -1,27 +1,45 @@ ; RUN: opt -S --passes="print-dx-shader-flags" 2>&1 %s | FileCheck %s -; RUN: llc %s --filetype=obj -o - | obj2yaml | FileCheck %s --check-prefix=DXC target triple = "dxil-pc-shadermodel6.7-library" -; CHECK: ; Shader Flags Value: 0x00000044 +; CHECK: ; Combined Shader Flags for Module +; CHECK-NEXT: ; Shader Flags Value: 0x00000044 + ; CHECK: ; Note: shader requires additional functionality: ; CHECK-NEXT: ; Double-precision floating point ; CHECK-NEXT: ; Double-precision extensions for 11.1 ; CHECK-NEXT: ; Note: extra DXIL module flags: -; CHECK-NEXT: {{^;$}} -define double @div(double %a, double %b) #0 { +; CHECK-NEXT: ; +; CHECK-NEXT: ; Shader Flags for Module Functions + +; CHECK: ; Function test_fdiv_double : 0x00000044 +define double @test_fdiv_double(double %a, double %b) #0 { %res = fdiv double %a, %b ret double %res } -attributes #0 = { convergent norecurse nounwind "hlsl.export"} +; CHECK: ; Function test_uitofp_i64 : 0x00000044 +define double @test_uitofp_i64(i64 %a) #0 { + %r = uitofp i64 %a to double + ret double %r +} + +; CHECK: ; Function test_sitofp_i64 : 0x00000044 +define double @test_sitofp_i64(i64 %a) #0 { + %r = sitofp i64 %a to double + ret double %r +} -; DXC: - Name: SFI0 -; DXC-NEXT: Size: 8 -; DXC-NEXT: Flags: -; DXC-NEXT: Doubles: true -; DXC-NOT: {{[A-Za-z]+: +true}} -; DXC: DX11_1_DoubleExtensions: true -; DXC-NOT: {{[A-Za-z]+: +true}} -; DXC: NextUnusedBit: false -; DXC: ... +; CHECK: ; Function test_fptoui_i32 : 0x00000044 +define i32 @test_fptoui_i32(double %a) #0 { + %r = fptoui double %a to i32 + ret i32 %r +} + +; CHECK: ; Function test_fptosi_i64 : 0x00000044 +define i64 @test_fptosi_i64(double %a) #0 { + %r = fptosi double %a to i64 + ret i64 %r +} + +attributes #0 = { convergent norecurse nounwind "hlsl.export"} diff --git a/llvm/test/CodeGen/DirectX/ShaderFlags/doubles.ll b/llvm/test/CodeGen/DirectX/ShaderFlags/doubles.ll index e9b44240e10b9..1c131f0774938 100644 --- a/llvm/test/CodeGen/DirectX/ShaderFlags/doubles.ll +++ b/llvm/test/CodeGen/DirectX/ShaderFlags/doubles.ll @@ -3,11 +3,15 @@ target triple = "dxil-pc-shadermodel6.7-library" -; CHECK: ; Shader Flags Value: 0x00000004 -; CHECK: ; Note: shader requires additional functionality: -; CHECK-NEXT: ; Double-precision floating point -; CHECK-NEXT: ; Note: extra DXIL module flags: -; CHECK-NEXT: {{^;$}} +;CHECK: ; Combined Shader Flags for Module +;CHECK-NEXT: ; Shader Flags Value: 0x00000004 +;CHECK-NEXT: ; +;CHECK-NEXT: ; Note: shader requires additional functionality: +;CHECK-NEXT: ; Double-precision floating point +;CHECK-NEXT: ; Note: extra DXIL module flags: +;CHECK-NEXT: ; +;CHECK-NEXT: ; Shader Flags for Module Functions +;CHECK-NEXT: ; Function add : 0x00000004 define double @add(double %a, double %b) #0 { %sum = fadd double %a, %b diff --git a/llvm/test/CodeGen/DirectX/ShaderFlags/no_flags.ll b/llvm/test/CodeGen/DirectX/ShaderFlags/no_flags.ll index f7baa1b64f9cd..f99d4fca84da2 100644 --- a/llvm/test/CodeGen/DirectX/ShaderFlags/no_flags.ll +++ b/llvm/test/CodeGen/DirectX/ShaderFlags/no_flags.ll @@ -2,7 +2,12 @@ target triple = "dxil-pc-shadermodel6.7-library" -; CHECK: ; Shader Flags Value: 0x00000000 +;CHECK: ; Combined Shader Flags for Module +;CHECK-NEXT: ; Shader Flags Value: 0x00000000 +;CHECK-NEXT: ; +;CHECK-NEXT: ; Shader Flags for Module Functions +;CHECK-NEXT: ; Function add : 0x00000000 + define i32 @add(i32 %a, i32 %b) { %sum = add i32 %a, %b ret i32 %sum diff --git a/llvm/test/CodeGen/DirectX/updateCounter.ll b/llvm/test/CodeGen/DirectX/bufferUpdateCounter.ll similarity index 86% rename from llvm/test/CodeGen/DirectX/updateCounter.ll rename to llvm/test/CodeGen/DirectX/bufferUpdateCounter.ll index 6bfb4d8670f55..3f2610649cba1 100644 --- a/llvm/test/CodeGen/DirectX/updateCounter.ll +++ b/llvm/test/CodeGen/DirectX/bufferUpdateCounter.ll @@ -12,7 +12,7 @@ define void @update_counter_decrement_vector() { ; CHECK-NEXT: [[BUFFANOT:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]] ; CHECK-NEXT: [[REG:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[BUFFANOT]], i8 -1) - %1 = call i32 @llvm.dx.updateCounter(target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i8 -1) + %1 = call i32 @llvm.dx.bufferUpdateCounter(target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i8 -1) ret void } @@ -24,7 +24,7 @@ define void @update_counter_increment_vector() { i32 0, i32 0, i32 1, i32 0, i1 false) ; CHECK-NEXT: [[BUFFANOT:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]] ; CHECK-NEXT: [[REG:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[BUFFANOT]], i8 1) - %1 = call i32 @llvm.dx.updateCounter(target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i8 1) + %1 = call i32 @llvm.dx.bufferUpdateCounter(target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i8 1) ret void } @@ -36,6 +36,6 @@ define void @update_counter_decrement_scalar() { i32 1, i32 8, i32 1, i32 0, i1 false) ; CHECK-NEXT: [[BUFFANOT:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]] ; CHECK-NEXT: [[REG:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[BUFFANOT]], i8 -1) - %1 = call i32 @llvm.dx.updateCounter(target("dx.RawBuffer", i8, 0, 0) %buffer, i8 -1) + %1 = call i32 @llvm.dx.bufferUpdateCounter(target("dx.RawBuffer", i8, 0, 0) %buffer, i8 -1) ret void } diff --git a/llvm/test/CodeGen/Generic/machine-function-splitter.ll b/llvm/test/CodeGen/Generic/machine-function-splitter.ll index 2097523a61c5f..1a8c9ede8f8b7 100644 --- a/llvm/test/CodeGen/Generic/machine-function-splitter.ll +++ b/llvm/test/CodeGen/Generic/machine-function-splitter.ll @@ -2,12 +2,21 @@ ; REQUIRES: x86-registered-target ; COM: Machine function splitting with FDO profiles -; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions | FileCheck %s -check-prefixes=MFS-DEFAULTS,MFS-DEFAULTS-X86 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions | FileCheck %s -check-prefixes=MFS-DEFAULTS,MFS-DEFAULTS-X86,MFS-NOBBSECTIONS ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefixes=MFS-OPTS1,MFS-OPTS1-X86 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefixes=MFS-OPTS2,MFS-OPTS2-X86 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-split-ehcode | FileCheck %s -check-prefixes=MFS-EH-SPLIT,MFS-EH-SPLIT-X86 ; RUN: llc < %s -mtriple=x86_64 -split-machine-functions -O0 -mfs-psi-cutoff=0 -mfs-count-threshold=10000 | FileCheck %s -check-prefixes=MFS-O0,MFS-O0-X86 +; COM: Machine function splitting along with -basic-block-sections profile +; RUN: echo 'v1' > %t +; RUN: echo 'ffoo21' >> %t +; RUN: echo 'c0' >> %t +; RUN: echo 'ffoo22' >> %t +; RUN: echo 'c0 1' >> %t +; RUN: echo 'c2' >> %t +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -basic-block-sections=%t -split-machine-functions | FileCheck %s --check-prefixes=MFS-BBSECTIONS + ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -aarch64-min-jump-table-entries=4 -enable-split-machine-functions | FileCheck %s -check-prefixes=MFS-DEFAULTS,MFS-DEFAULTS-AARCH64 ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -aarch64-min-jump-table-entries=4 -enable-split-machine-functions -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefixes=MFS-OPTS1,MFS-OPTS1-AARCH64 ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -aarch64-min-jump-table-entries=4 -enable-split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefixes=MFS-OPTS2,MFS-OPTS2-AARCH64 @@ -610,6 +619,61 @@ cold_asm_target: ret void } +define void @foo21(i1 zeroext %0) { +;; Check that a function with basic-block-sections profile (but no pgo profile) +;; is properly split when the profile is used along with mfs. +; MFS-BBSECTIONS: .section .text.hot.foo21 +; MFS-NOBBSECTIONS-NOT: .section .text.hot.foo21 +; MFS-BBSECTIONS-LABEL: foo21: +; MFS-NOBBSECTIONS-NOT: foo21.cold: +; MFS-BBSECTIONS: .section .text.split.foo21 +; MFS-BBSECTIONS: foo21.cold + %2 = alloca i8, align 1 + %3 = zext i1 %0 to i8 + store i8 %3, ptr %2, align 1 + %4 = load i8, ptr %2, align 1 + %5 = trunc i8 %4 to i1 + br i1 %5, label %6, label %8 + +6: ; preds = %1 + %7 = call i32 @bar() + br label %10 + +8: ; preds = %1 + %9 = call i32 @baz() + br label %10 + +10: ; preds = %8, %6 + ret void +} + +define void @foo22(i1 zeroext %0) nounwind !prof !14 !section_prefix !15 { +;; Check that when a function has both basic-block-section and pgo profiles +;; only the basic-block-section profile is used for splitting. + +;; Check that we create two hot sections with -basic-block-sections. +; MFS-BBSECTIONS: .section .text.hot.foo22 +; MFS-BBSECTIONS-LABEL: foo22: +; MFS-BBSECTIONS: callq bar +; MFS-BBSECTIONS: .section .text.hot.foo22 +; MFS-BBSECTIONS-NEXT: foo22.__part.1: +; MFS-BBSECTIONS: callq baz +; MFS-BBSECTIONS-NOT: .section .text.split.foo22 + br i1 %0, label %2, label %4, !prof !17 + +2: ; preds = %1 + %3 = call i32 @bar() + br label %6 + +4: ; preds = %1 + %5 = call i32 @baz() + br label %6 + +6: ; preds = %4, %2 + %7 = tail call i32 @qux() + ret void +} + declare i32 @bar() declare i32 @baz() declare i32 @bam() diff --git a/llvm/test/CodeGen/Hexagon/widen-not-load.ll b/llvm/test/CodeGen/Hexagon/widen-not-load.ll index 6206a0a5367e4..5bf8b57054a91 100644 --- a/llvm/test/CodeGen/Hexagon/widen-not-load.ll +++ b/llvm/test/CodeGen/Hexagon/widen-not-load.ll @@ -1,6 +1,9 @@ ; Test that double word post increment load is not generated. +; REQUIRES: asserts -; RUN: llc -march=hexagon -O2 -debug-only=hexagon-load-store-widening %s -o 2>&1 - | FileCheck %s +; REQUIRES: asserts +; RUN: llc -march=hexagon -O2 -debug-only=hexagon-load-store-widening \ +; RUN: %s -o 2>&1 - | FileCheck %s ; Loads with positive invalid postinc is not widened define ptr @test1() { diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll b/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll new file mode 100644 index 0000000000000..2f7c93eb1c0de --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll @@ -0,0 +1,1323 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d -verify-machineinstrs < %s \ +; RUN: -target-abi=ilp32d | FileCheck -check-prefixes=CHECKIFD,RV32IFD %s +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d -verify-machineinstrs < %s \ +; RUN: -target-abi=lp64d | FileCheck -check-prefixes=CHECKIFD,RV64IFD %s +; RUN: llc -mtriple=riscv32 -global-isel -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s + +; These tests are each targeted at a particular RISC-V FPU instruction. +; Compares and conversions can be found in double-fcmp.ll and double-convert.ll +; respectively. Some other double-*.ll files in this folder exercise LLVM IR +; instructions that don't directly match a RISC-V instruction. + +define double @fadd_d(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fadd_d: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fadd.d fa0, fa0, fa1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fadd_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fadd_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fadd double %a, %b + ret double %1 +} + +define double @fsub_d(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fsub_d: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fsub.d fa0, fa0, fa1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fsub_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __subdf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fsub_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __subdf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fsub double %a, %b + ret double %1 +} + +define double @fmul_d(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fmul_d: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fmul.d fa0, fa0, fa1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fmul_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __muldf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmul_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __muldf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fmul double %a, %b + ret double %1 +} + +define double @fdiv_d(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fdiv_d: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fdiv.d fa0, fa0, fa1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fdiv_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __divdf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fdiv_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __divdf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fdiv double %a, %b + ret double %1 +} + +declare double @llvm.sqrt.f64(double) + +define double @fsqrt_d(double %a) nounwind { +; CHECKIFD-LABEL: fsqrt_d: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fsqrt.d fa0, fa0 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fsqrt_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call sqrt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fsqrt_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call sqrt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.sqrt.f64(double %a) + ret double %1 +} + +declare double @llvm.copysign.f64(double, double) + +define double @fsgnj_d(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fsgnj_d: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fsgnj.d fa0, fa0, fa1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fsgnj_d: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: addi a4, a2, -1 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: and a2, a3, a2 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fsgnj_d: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, -1 +; RV64I-NEXT: slli a3, a2, 63 +; RV64I-NEXT: srli a2, a2, 1 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret + %1 = call double @llvm.copysign.f64(double %a, double %b) + ret double %1 +} + +define double @fsgnjn_d(double %a, double %b) nounwind { +; TODO: fsgnjn.s isn't selected on RV64 because DAGCombiner::visitBITCAST will +; convert (bitconvert (fneg x)) to a xor. +; +; CHECKIFD-LABEL: fsgnjn_d: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fsgnjn.d fa0, fa0, fa1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fsgnjn_d: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: xor a3, a3, a2 +; RV32I-NEXT: addi a4, a2, -1 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: and a2, a3, a2 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fsgnjn_d: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, -1 +; RV64I-NEXT: slli a3, a2, 63 +; RV64I-NEXT: srli a2, a2, 1 +; RV64I-NEXT: xor a1, a1, a3 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret + %1 = fneg double %b + %2 = call double @llvm.copysign.f64(double %a, double %1) + ret double %2 +} + +declare double @llvm.fabs.f64(double) + +; This function performs extra work to ensure that +; DAGCombiner::visitBITCAST doesn't replace the fabs with an and. +define double @fabs_d(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fabs_d: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fadd.d fa5, fa0, fa1 +; CHECKIFD-NEXT: fabs.d fa4, fa5 +; CHECKIFD-NEXT: fadd.d fa0, fa4, fa5 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fabs_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a1, a3, a1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fabs_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: li a0, -1 +; RV64I-NEXT: srli a0, a0, 1 +; RV64I-NEXT: and a0, a1, a0 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fadd double %a, %b + %2 = call double @llvm.fabs.f64(double %1) + %3 = fadd double %2, %1 + ret double %3 +} + +declare double @llvm.minnum.f64(double, double) + +define double @fmin_d(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fmin_d: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fmin.d fa0, fa0, fa1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fmin_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fmin +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmin_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fmin +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.minnum.f64(double %a, double %b) + ret double %1 +} + +declare double @llvm.maxnum.f64(double, double) + +define double @fmax_d(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fmax_d: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fmax.d fa0, fa0, fa1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fmax_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fmax +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmax_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fmax +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.maxnum.f64(double %a, double %b) + ret double %1 +} + +declare double @llvm.fma.f64(double, double, double) + +define double @fmadd_d(double %a, double %b, double %c) nounwind { +; CHECKIFD-LABEL: fmadd_d: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fmadd.d fa0, fa0, fa1, fa2 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fmadd_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fma +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmadd_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fma +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.fma.f64(double %a, double %b, double %c) + ret double %1 +} + +define double @fmsub_d(double %a, double %b, double %c) nounwind { +; RV32IFD-LABEL: fmsub_d: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw zero, 8(sp) +; RV32IFD-NEXT: sw zero, 12(sp) +; RV32IFD-NEXT: fld fa5, 8(sp) +; RV32IFD-NEXT: fadd.d fa5, fa2, fa5 +; RV32IFD-NEXT: fmsub.d fa0, fa0, fa1, fa5 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fmsub_d: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x fa5, zero +; RV64IFD-NEXT: fadd.d fa5, fa2, fa5 +; RV64IFD-NEXT: fmsub.d fa0, fa0, fa1, fa5 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fmsub_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: mv s3, a3 +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: lui a1, %hi(.LCPI11_0) +; RV32I-NEXT: addi a1, a1, %lo(.LCPI11_0) +; RV32I-NEXT: lw a2, 0(a1) +; RV32I-NEXT: lw a3, 4(a1) +; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: mv a4, a0 +; RV32I-NEXT: lui a5, 524288 +; RV32I-NEXT: xor a5, a1, a5 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s3 +; RV32I-NEXT: call fma +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmsub_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: lui a0, %hi(.LCPI11_0) +; RV64I-NEXT: ld a1, %lo(.LCPI11_0)(a0) +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: slli a1, a1, 63 +; RV64I-NEXT: xor a2, a0, a1 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call fma +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %c_ = fadd double 0.0, %c ; avoid negation using xor + %negc = fneg double %c_ + %1 = call double @llvm.fma.f64(double %a, double %b, double %negc) + ret double %1 +} + +define double @fnmadd_d(double %a, double %b, double %c) nounwind { +; RV32IFD-LABEL: fnmadd_d: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw zero, 8(sp) +; RV32IFD-NEXT: sw zero, 12(sp) +; RV32IFD-NEXT: fld fa5, 8(sp) +; RV32IFD-NEXT: fadd.d fa4, fa0, fa5 +; RV32IFD-NEXT: fadd.d fa5, fa2, fa5 +; RV32IFD-NEXT: fnmadd.d fa0, fa4, fa1, fa5 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fnmadd_d: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x fa5, zero +; RV64IFD-NEXT: fadd.d fa4, fa0, fa5 +; RV64IFD-NEXT: fadd.d fa5, fa2, fa5 +; RV64IFD-NEXT: fnmadd.d fa0, fa4, fa1, fa5 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fnmadd_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s1, a3 +; RV32I-NEXT: mv s2, a4 +; RV32I-NEXT: lui a2, %hi(.LCPI12_0) +; RV32I-NEXT: addi a2, a2, %lo(.LCPI12_0) +; RV32I-NEXT: lw s3, 0(a2) +; RV32I-NEXT: lw s4, 4(a2) +; RV32I-NEXT: mv s5, a5 +; RV32I-NEXT: mv a2, s3 +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: mv s6, a0 +; RV32I-NEXT: mv s7, a1 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s5 +; RV32I-NEXT: mv a2, s3 +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: mv a4, a0 +; RV32I-NEXT: lui a5, 524288 +; RV32I-NEXT: xor a2, s7, a5 +; RV32I-NEXT: xor a5, a1, a5 +; RV32I-NEXT: mv a0, s6 +; RV32I-NEXT: mv a1, a2 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s1 +; RV32I-NEXT: call fma +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI12_0) +; RV64I-NEXT: ld s1, %lo(.LCPI12_0)(a1) +; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: slli a2, a1, 63 +; RV64I-NEXT: xor a1, s3, a2 +; RV64I-NEXT: xor a2, a0, a2 +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call fma +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret + %a_ = fadd double 0.0, %a + %c_ = fadd double 0.0, %c + %nega = fneg double %a_ + %negc = fneg double %c_ + %1 = call double @llvm.fma.f64(double %nega, double %b, double %negc) + ret double %1 +} + +define double @fnmadd_d_2(double %a, double %b, double %c) nounwind { +; RV32IFD-LABEL: fnmadd_d_2: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw zero, 8(sp) +; RV32IFD-NEXT: sw zero, 12(sp) +; RV32IFD-NEXT: fld fa5, 8(sp) +; RV32IFD-NEXT: fadd.d fa4, fa1, fa5 +; RV32IFD-NEXT: fadd.d fa5, fa2, fa5 +; RV32IFD-NEXT: fnmadd.d fa0, fa4, fa0, fa5 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fnmadd_d_2: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x fa5, zero +; RV64IFD-NEXT: fadd.d fa4, fa1, fa5 +; RV64IFD-NEXT: fadd.d fa5, fa2, fa5 +; RV64IFD-NEXT: fnmadd.d fa0, fa4, fa0, fa5 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fnmadd_d_2: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: mv s2, a4 +; RV32I-NEXT: lui a2, %hi(.LCPI13_0) +; RV32I-NEXT: addi a2, a2, %lo(.LCPI13_0) +; RV32I-NEXT: lw s3, 0(a2) +; RV32I-NEXT: lw s4, 4(a2) +; RV32I-NEXT: mv s5, a5 +; RV32I-NEXT: mv a2, s3 +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: mv s6, a0 +; RV32I-NEXT: mv s7, a1 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s5 +; RV32I-NEXT: mv a2, s3 +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: mv a4, a0 +; RV32I-NEXT: lui a5, 524288 +; RV32I-NEXT: xor a3, s7, a5 +; RV32I-NEXT: xor a5, a1, a5 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a2, s6 +; RV32I-NEXT: call fma +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_d_2: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI13_0) +; RV64I-NEXT: ld s1, %lo(.LCPI13_0)(a1) +; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: slli a2, a1, 63 +; RV64I-NEXT: xor a1, s3, a2 +; RV64I-NEXT: xor a2, a0, a2 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call fma +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret + %b_ = fadd double 0.0, %b + %c_ = fadd double 0.0, %c + %negb = fneg double %b_ + %negc = fneg double %c_ + %1 = call double @llvm.fma.f64(double %a, double %negb, double %negc) + ret double %1 +} + +define double @fnmadd_d_3(double %a, double %b, double %c) nounwind { +; CHECKIFD-LABEL: fnmadd_d_3: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fmadd.d fa5, fa0, fa1, fa2 +; CHECKIFD-NEXT: fneg.d fa0, fa5 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fnmadd_d_3: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fma +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: xor a1, a1, a2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_d_3: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fma +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: slli a1, a1, 63 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.fma.f64(double %a, double %b, double %c) + %neg = fneg double %1 + ret double %neg +} + + +define double @fnmadd_nsz(double %a, double %b, double %c) nounwind { +; CHECKIFD-LABEL: fnmadd_nsz: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fmadd.d fa5, fa0, fa1, fa2 +; CHECKIFD-NEXT: fneg.d fa0, fa5 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fnmadd_nsz: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fma +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: xor a1, a1, a2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_nsz: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fma +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: slli a1, a1, 63 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call nsz double @llvm.fma.f64(double %a, double %b, double %c) + %neg = fneg nsz double %1 + ret double %neg +} + +define double @fnmsub_d(double %a, double %b, double %c) nounwind { +; RV32IFD-LABEL: fnmsub_d: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw zero, 8(sp) +; RV32IFD-NEXT: sw zero, 12(sp) +; RV32IFD-NEXT: fld fa5, 8(sp) +; RV32IFD-NEXT: fadd.d fa5, fa0, fa5 +; RV32IFD-NEXT: fnmsub.d fa0, fa5, fa1, fa2 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fnmsub_d: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x fa5, zero +; RV64IFD-NEXT: fadd.d fa5, fa0, fa5 +; RV64IFD-NEXT: fnmsub.d fa0, fa5, fa1, fa2 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fnmsub_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s1, a3 +; RV32I-NEXT: mv s2, a4 +; RV32I-NEXT: lui a2, %hi(.LCPI16_0) +; RV32I-NEXT: addi a3, a2, %lo(.LCPI16_0) +; RV32I-NEXT: lw a2, 0(a3) +; RV32I-NEXT: lw a3, 4(a3) +; RV32I-NEXT: mv s3, a5 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: xor a1, a1, a2 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s1 +; RV32I-NEXT: mv a4, s2 +; RV32I-NEXT: mv a5, s3 +; RV32I-NEXT: call fma +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmsub_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI16_0) +; RV64I-NEXT: ld a1, %lo(.LCPI16_0)(a1) +; RV64I-NEXT: mv s1, a2 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: slli a1, a1, 63 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: call fma +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %a_ = fadd double 0.0, %a + %nega = fneg double %a_ + %1 = call double @llvm.fma.f64(double %nega, double %b, double %c) + ret double %1 +} + +define double @fnmsub_d_2(double %a, double %b, double %c) nounwind { +; RV32IFD-LABEL: fnmsub_d_2: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw zero, 8(sp) +; RV32IFD-NEXT: sw zero, 12(sp) +; RV32IFD-NEXT: fld fa5, 8(sp) +; RV32IFD-NEXT: fadd.d fa5, fa1, fa5 +; RV32IFD-NEXT: fnmsub.d fa0, fa5, fa0, fa2 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fnmsub_d_2: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x fa5, zero +; RV64IFD-NEXT: fadd.d fa5, fa1, fa5 +; RV64IFD-NEXT: fnmsub.d fa0, fa5, fa0, fa2 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fnmsub_d_2: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: mv s2, a4 +; RV32I-NEXT: lui a2, %hi(.LCPI17_0) +; RV32I-NEXT: addi a3, a2, %lo(.LCPI17_0) +; RV32I-NEXT: lw a2, 0(a3) +; RV32I-NEXT: lw a3, 4(a3) +; RV32I-NEXT: mv s3, a5 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lui a3, 524288 +; RV32I-NEXT: xor a3, a1, a3 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a4, s2 +; RV32I-NEXT: mv a5, s3 +; RV32I-NEXT: call fma +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmsub_d_2: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI17_0) +; RV64I-NEXT: ld a1, %lo(.LCPI17_0)(a1) +; RV64I-NEXT: mv s1, a2 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: slli a1, a1, 63 +; RV64I-NEXT: xor a1, a0, a1 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: call fma +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %b_ = fadd double 0.0, %b + %negb = fneg double %b_ + %1 = call double @llvm.fma.f64(double %a, double %negb, double %c) + ret double %1 +} + +define double @fmadd_d_contract(double %a, double %b, double %c) nounwind { +; CHECKIFD-LABEL: fmadd_d_contract: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fmadd.d fa0, fa0, fa1, fa2 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fmadd_d_contract: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a4 +; RV32I-NEXT: mv s1, a5 +; RV32I-NEXT: call __muldf3 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s1 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmadd_d_contract: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a2 +; RV64I-NEXT: call __muldf3 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fmul contract double %a, %b + %2 = fadd contract double %1, %c + ret double %2 +} + +define double @fmsub_d_contract(double %a, double %b, double %c) nounwind { +; RV32IFD-LABEL: fmsub_d_contract: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw zero, 8(sp) +; RV32IFD-NEXT: sw zero, 12(sp) +; RV32IFD-NEXT: fld fa5, 8(sp) +; RV32IFD-NEXT: fadd.d fa5, fa2, fa5 +; RV32IFD-NEXT: fmul.d fa4, fa0, fa1 +; RV32IFD-NEXT: fsub.d fa0, fa4, fa5 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fmsub_d_contract: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x fa5, zero +; RV64IFD-NEXT: fadd.d fa5, fa2, fa5 +; RV64IFD-NEXT: fmul.d fa4, fa0, fa1 +; RV64IFD-NEXT: fsub.d fa0, fa4, fa5 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fmsub_d_contract: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: mv s3, a3 +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: lui a1, %hi(.LCPI19_0) +; RV32I-NEXT: addi a1, a1, %lo(.LCPI19_0) +; RV32I-NEXT: lw a2, 0(a1) +; RV32I-NEXT: lw a3, 4(a1) +; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s5, a1 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s3 +; RV32I-NEXT: call __muldf3 +; RV32I-NEXT: mv a2, s4 +; RV32I-NEXT: mv a3, s5 +; RV32I-NEXT: call __subdf3 +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmsub_d_contract: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: lui a0, %hi(.LCPI19_0) +; RV64I-NEXT: ld a1, %lo(.LCPI19_0)(a0) +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __muldf3 +; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: call __subdf3 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %c_ = fadd double 0.0, %c ; avoid negation using xor + %1 = fmul contract double %a, %b + %2 = fsub contract double %1, %c_ + ret double %2 +} + +define double @fnmadd_d_contract(double %a, double %b, double %c) nounwind { +; RV32IFD-LABEL: fnmadd_d_contract: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw zero, 8(sp) +; RV32IFD-NEXT: sw zero, 12(sp) +; RV32IFD-NEXT: fld fa5, 8(sp) +; RV32IFD-NEXT: fadd.d fa4, fa0, fa5 +; RV32IFD-NEXT: fadd.d fa3, fa1, fa5 +; RV32IFD-NEXT: fadd.d fa5, fa2, fa5 +; RV32IFD-NEXT: fmul.d fa4, fa4, fa3 +; RV32IFD-NEXT: fneg.d fa4, fa4 +; RV32IFD-NEXT: fsub.d fa0, fa4, fa5 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fnmadd_d_contract: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x fa5, zero +; RV64IFD-NEXT: fadd.d fa4, fa0, fa5 +; RV64IFD-NEXT: fadd.d fa3, fa1, fa5 +; RV64IFD-NEXT: fadd.d fa5, fa2, fa5 +; RV64IFD-NEXT: fmul.d fa4, fa4, fa3 +; RV64IFD-NEXT: fneg.d fa4, fa4 +; RV64IFD-NEXT: fsub.d fa0, fa4, fa5 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fnmadd_d_contract: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s1, a3 +; RV32I-NEXT: mv s2, a4 +; RV32I-NEXT: lui a2, %hi(.LCPI20_0) +; RV32I-NEXT: addi a2, a2, %lo(.LCPI20_0) +; RV32I-NEXT: lw s3, 0(a2) +; RV32I-NEXT: lw s4, 4(a2) +; RV32I-NEXT: mv s5, a5 +; RV32I-NEXT: mv a2, s3 +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: mv s6, a0 +; RV32I-NEXT: mv s7, a1 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a2, s3 +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s5 +; RV32I-NEXT: mv a2, s3 +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: mv a0, s6 +; RV32I-NEXT: mv a1, s7 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s1 +; RV32I-NEXT: call __muldf3 +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: xor a1, a1, a2 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s3 +; RV32I-NEXT: call __subdf3 +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_d_contract: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI20_0) +; RV64I-NEXT: ld s1, %lo(.LCPI20_0)(a1) +; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call __muldf3 +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: slli a1, a1, 63 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __subdf3 +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret + %a_ = fadd double 0.0, %a ; avoid negation using xor + %b_ = fadd double 0.0, %b ; avoid negation using xor + %c_ = fadd double 0.0, %c ; avoid negation using xor + %1 = fmul contract double %a_, %b_ + %2 = fneg double %1 + %3 = fsub contract double %2, %c_ + ret double %3 +} + +define double @fnmsub_d_contract(double %a, double %b, double %c) nounwind { +; RV32IFD-LABEL: fnmsub_d_contract: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw zero, 8(sp) +; RV32IFD-NEXT: sw zero, 12(sp) +; RV32IFD-NEXT: fld fa5, 8(sp) +; RV32IFD-NEXT: fadd.d fa4, fa0, fa5 +; RV32IFD-NEXT: fadd.d fa5, fa1, fa5 +; RV32IFD-NEXT: fnmsub.d fa0, fa4, fa5, fa2 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fnmsub_d_contract: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x fa5, zero +; RV64IFD-NEXT: fadd.d fa4, fa0, fa5 +; RV64IFD-NEXT: fadd.d fa5, fa1, fa5 +; RV64IFD-NEXT: fnmsub.d fa0, fa4, fa5, fa2 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fnmsub_d_contract: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s1, a3 +; RV32I-NEXT: mv s2, a4 +; RV32I-NEXT: lui a2, %hi(.LCPI21_0) +; RV32I-NEXT: addi a2, a2, %lo(.LCPI21_0) +; RV32I-NEXT: lw s3, 0(a2) +; RV32I-NEXT: lw s4, 4(a2) +; RV32I-NEXT: mv s5, a5 +; RV32I-NEXT: mv a2, s3 +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: mv s6, a0 +; RV32I-NEXT: mv s7, a1 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a2, s3 +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a0, s6 +; RV32I-NEXT: mv a1, s7 +; RV32I-NEXT: call __muldf3 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s5 +; RV32I-NEXT: call __subdf3 +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmsub_d_contract: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI21_0) +; RV64I-NEXT: ld s1, %lo(.LCPI21_0)(a1) +; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: call __muldf3 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: call __subdf3 +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret + %a_ = fadd double 0.0, %a ; avoid negation using xor + %b_ = fadd double 0.0, %b ; avoid negation using xor + %1 = fmul contract double %a_, %b_ + %2 = fsub contract double %c, %1 + ret double %2 +} + +define double @fsgnjx_f64(double %x, double %y) nounwind { +; RV32IFD-LABEL: fsgnjx_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: lui a0, 261888 +; RV32IFD-NEXT: sw zero, 8(sp) +; RV32IFD-NEXT: sw a0, 12(sp) +; RV32IFD-NEXT: fld fa5, 8(sp) +; RV32IFD-NEXT: fsgnj.d fa5, fa5, fa0 +; RV32IFD-NEXT: fmul.d fa0, fa5, fa1 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fsgnjx_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: li a0, 1023 +; RV64IFD-NEXT: slli a0, a0, 52 +; RV64IFD-NEXT: fmv.d.x fa5, a0 +; RV64IFD-NEXT: fsgnj.d fa5, fa5, fa0 +; RV64IFD-NEXT: fmul.d fa0, fa5, fa1 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fsgnjx_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lui a0, 524288 +; RV32I-NEXT: lui a4, 261888 +; RV32I-NEXT: and a0, a1, a0 +; RV32I-NEXT: or a1, a0, a4 +; RV32I-NEXT: li a0, 0 +; RV32I-NEXT: call __muldf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fsgnjx_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, -1 +; RV64I-NEXT: li a3, 1023 +; RV64I-NEXT: slli a2, a2, 63 +; RV64I-NEXT: slli a3, a3, 52 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: call __muldf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %z = call double @llvm.copysign.f64(double 1.0, double %x) + %mul = fmul double %z, %y + ret double %mul +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll b/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll index a4f92640697bc..7133d5c100e75 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll @@ -43,23 +43,21 @@ define i32 @fcvt_wu_d(double %a) nounwind { define i32 @fcvt_wu_d_multiple_use(double %x, ptr %y) nounwind { ; RV32IFD-LABEL: fcvt_wu_d_multiple_use: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: fcvt.wu.d a1, fa0, rtz -; RV32IFD-NEXT: li a0, 1 -; RV32IFD-NEXT: beqz a1, .LBB4_2 +; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rtz +; RV32IFD-NEXT: bnez a0, .LBB4_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: mv a0, a1 +; RV32IFD-NEXT: li a0, 1 ; RV32IFD-NEXT: .LBB4_2: ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: fcvt_wu_d_multiple_use: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: fcvt.wu.d a1, fa0, rtz -; RV64IFD-NEXT: slli a0, a1, 32 -; RV64IFD-NEXT: srli a2, a0, 32 -; RV64IFD-NEXT: li a0, 1 -; RV64IFD-NEXT: beqz a2, .LBB4_2 +; RV64IFD-NEXT: fcvt.wu.d a0, fa0, rtz +; RV64IFD-NEXT: slli a1, a0, 32 +; RV64IFD-NEXT: srli a1, a1, 32 +; RV64IFD-NEXT: bnez a1, .LBB4_2 ; RV64IFD-NEXT: # %bb.1: -; RV64IFD-NEXT: mv a0, a1 +; RV64IFD-NEXT: li a0, 1 ; RV64IFD-NEXT: .LBB4_2: ; RV64IFD-NEXT: ret %a = fptoui double %x to i32 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/double-intrinsics.ll b/llvm/test/CodeGen/RISCV/GlobalISel/double-intrinsics.ll index ad461f8f24b91..2b67d5c7ac570 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/double-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/double-intrinsics.ll @@ -5,6 +5,10 @@ ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -global-isel -mattr=+d \ ; RUN: -verify-machineinstrs -target-abi=lp64d \ ; RUN: | FileCheck -check-prefixes=CHECKIFD,RV64IFD %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -global-isel \ +; RUN: -verify-machineinstrs | FileCheck -check-prefix=RV32I %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -global-isel \ +; RUN: -verify-machineinstrs | FileCheck -check-prefix=RV64I %s declare double @llvm.sqrt.f64(double) @@ -13,6 +17,24 @@ define double @sqrt_f64(double %a) nounwind { ; CHECKIFD: # %bb.0: ; CHECKIFD-NEXT: fsqrt.d fa0, fa0 ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: sqrt_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call sqrt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: sqrt_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call sqrt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call double @llvm.sqrt.f64(double %a) ret double %1 } @@ -24,6 +46,24 @@ define double @fma_f64(double %a, double %b, double %c) nounwind { ; CHECKIFD: # %bb.0: ; CHECKIFD-NEXT: fmadd.d fa0, fa0, fa1, fa2 ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fma_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fma +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fma_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fma +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call double @llvm.fma.f64(double %a, double %b, double %c) ret double %1 } @@ -35,6 +75,38 @@ define double @fmuladd_f64(double %a, double %b, double %c) nounwind { ; CHECKIFD: # %bb.0: ; CHECKIFD-NEXT: fmadd.d fa0, fa0, fa1, fa2 ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fmuladd_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a4 +; RV32I-NEXT: mv s1, a5 +; RV32I-NEXT: call __muldf3 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s1 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmuladd_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a2 +; RV64I-NEXT: call __muldf3 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call double @llvm.fmuladd.f64(double %a, double %b, double %c) ret double %1 } @@ -46,6 +118,20 @@ define double @fabs_f64(double %a) nounwind { ; CHECKIFD: # %bb.0: ; CHECKIFD-NEXT: fabs.d fa0, fa0 ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fabs_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: addi a2, a2, -1 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fabs_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: srli a1, a1, 1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: ret %1 = call double @llvm.fabs.f64(double %a) ret double %1 } @@ -57,6 +143,24 @@ define double @minnum_f64(double %a, double %b) nounwind { ; CHECKIFD: # %bb.0: ; CHECKIFD-NEXT: fmin.d fa0, fa0, fa1 ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: minnum_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fmin +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: minnum_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fmin +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call double @llvm.minnum.f64(double %a, double %b) ret double %1 } @@ -68,6 +172,24 @@ define double @maxnum_f64(double %a, double %b) nounwind { ; CHECKIFD: # %bb.0: ; CHECKIFD-NEXT: fmax.d fa0, fa0, fa1 ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: maxnum_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fmax +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: maxnum_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fmax +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call double @llvm.maxnum.f64(double %a, double %b) ret double %1 } @@ -79,6 +201,25 @@ define double @copysign_f64(double %a, double %b) nounwind { ; CHECKIFD: # %bb.0: ; CHECKIFD-NEXT: fsgnj.d fa0, fa0, fa1 ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: copysign_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: addi a4, a2, -1 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: and a2, a3, a2 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: copysign_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, -1 +; RV64I-NEXT: slli a3, a2, 63 +; RV64I-NEXT: srli a2, a2, 1 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret %1 = call double @llvm.copysign.f64(double %a, double %b) ret double %1 } @@ -103,6 +244,24 @@ define double @floor_f64(double %a) nounwind { ; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: addi sp, sp, 16 ; RV64IFD-NEXT: ret +; +; RV32I-LABEL: floor_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call floor +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: floor_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call floor +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call double @llvm.floor.f64(double %a) ret double %1 } @@ -127,6 +286,24 @@ define double @ceil_f64(double %a) nounwind { ; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: addi sp, sp, 16 ; RV64IFD-NEXT: ret +; +; RV32I-LABEL: ceil_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call ceil +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: ceil_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call ceil +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call double @llvm.ceil.f64(double %a) ret double %1 } @@ -151,6 +328,24 @@ define double @trunc_f64(double %a) nounwind { ; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: addi sp, sp, 16 ; RV64IFD-NEXT: ret +; +; RV32I-LABEL: trunc_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call trunc +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: trunc_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call trunc +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call double @llvm.trunc.f64(double %a) ret double %1 } @@ -175,6 +370,24 @@ define double @rint_f64(double %a) nounwind { ; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: addi sp, sp, 16 ; RV64IFD-NEXT: ret +; +; RV32I-LABEL: rint_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call rint +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: rint_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call rint +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call double @llvm.rint.f64(double %a) ret double %1 } @@ -199,6 +412,24 @@ define double @nearbyint_f64(double %a) nounwind { ; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: addi sp, sp, 16 ; RV64IFD-NEXT: ret +; +; RV32I-LABEL: nearbyint_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call nearbyint +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: nearbyint_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call nearbyint +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call double @llvm.nearbyint.f64(double %a) ret double %1 } @@ -223,6 +454,24 @@ define double @round_f64(double %a) nounwind { ; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: addi sp, sp, 16 ; RV64IFD-NEXT: ret +; +; RV32I-LABEL: round_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call round +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: round_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call round +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call double @llvm.round.f64(double %a) ret double %1 } @@ -247,6 +496,24 @@ define double @roundeven_f64(double %a) nounwind { ; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: addi sp, sp, 16 ; RV64IFD-NEXT: ret +; +; RV32I-LABEL: roundeven_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call roundeven +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: roundeven_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call roundeven +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call double @llvm.roundeven.f64(double %a) ret double %1 } @@ -259,6 +526,30 @@ define i1 @isnan_d_fpclass(double %x) { ; CHECKIFD-NEXT: andi a0, a0, 768 ; CHECKIFD-NEXT: snez a0, a0 ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: isnan_d_fpclass: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: addi a3, a2, -1 +; RV32I-NEXT: lui a2, 524032 +; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: beq a1, a2, .LBB14_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sltu a0, a2, a1 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB14_2: +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: isnan_d_fpclass: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: li a2, 2047 +; RV64I-NEXT: srli a1, a1, 1 +; RV64I-NEXT: slli a2, a2, 52 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: sltu a0, a2, a0 +; RV64I-NEXT: ret %1 = call i1 @llvm.is.fpclass.f64(double %x, i32 3) ; nan ret i1 %1 } diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll b/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll new file mode 100644 index 0000000000000..7fe4d2ef797af --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll @@ -0,0 +1,1099 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+f -verify-machineinstrs < %s \ +; RUN: -target-abi=ilp32f | FileCheck -check-prefix=CHECKIF %s +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+f -verify-machineinstrs < %s \ +; RUN: -target-abi=lp64f | FileCheck -check-prefix=CHECKIF %s +; RUN: llc -mtriple=riscv32 -global-isel -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s + +; These tests are each targeted at a particular RISC-V FPU instruction. +; Compares and conversions can be found in float-fcmp.ll and float-convert.ll +; respectively. Some other float-*.ll files in this folder exercise LLVM IR +; instructions that don't directly match a RISC-V instruction. + +define float @fadd_s(float %a, float %b) nounwind { +; CHECKIF-LABEL: fadd_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fadd.s fa0, fa0, fa1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fadd_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fadd_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fadd float %a, %b + ret float %1 +} + +define float @fsub_s(float %a, float %b) nounwind { +; CHECKIF-LABEL: fsub_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fsub.s fa0, fa0, fa1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fsub_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __subsf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fsub_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __subsf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fsub float %a, %b + ret float %1 +} + +define float @fmul_s(float %a, float %b) nounwind { +; CHECKIF-LABEL: fmul_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmul.s fa0, fa0, fa1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fmul_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __mulsf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmul_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __mulsf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fmul float %a, %b + ret float %1 +} + +define float @fdiv_s(float %a, float %b) nounwind { +; CHECKIF-LABEL: fdiv_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fdiv.s fa0, fa0, fa1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fdiv_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __divsf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fdiv_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __divsf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fdiv float %a, %b + ret float %1 +} + +declare float @llvm.sqrt.f32(float) + +define float @fsqrt_s(float %a) nounwind { +; CHECKIF-LABEL: fsqrt_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fsqrt.s fa0, fa0 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fsqrt_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call sqrtf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fsqrt_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call sqrtf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.sqrt.f32(float %a) + ret float %1 +} + +declare float @llvm.copysign.f32(float, float) + +define float @fsgnj_s(float %a, float %b) nounwind { +; CHECKIF-LABEL: fsgnj_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fsgnj.s fa0, fa0, fa1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fsgnj_s: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: addi a3, a2, -1 +; RV32I-NEXT: and a0, a0, a3 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fsgnj_s: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a2, 524288 +; RV64I-NEXT: addiw a3, a2, -1 +; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret + %1 = call float @llvm.copysign.f32(float %a, float %b) + ret float %1 +} + +define float @fsgnjn_s(float %a, float %b) nounwind { +; CHECKIF-LABEL: fsgnjn_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fadd.s fa5, fa0, fa1 +; CHECKIF-NEXT: fsgnjn.s fa0, fa0, fa5 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fsgnjn_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: addi a2, a1, -1 +; RV32I-NEXT: and a2, s0, a2 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fsgnjn_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: addiw a2, a1, -1 +; RV64I-NEXT: and a2, s0, a2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fadd float %a, %b + %2 = fneg float %1 + %3 = call float @llvm.copysign.f32(float %a, float %2) + ret float %3 +} + +declare float @llvm.fabs.f32(float) + +define float @fabs_s(float %a, float %b) nounwind { +; CHECKIF-LABEL: fabs_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fadd.s fa5, fa0, fa1 +; CHECKIF-NEXT: fabs.s fa4, fa5 +; CHECKIF-NEXT: fadd.s fa0, fa4, fa5 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fabs_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lui a0, 524288 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a1, a0 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fabs_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lui a0, 524288 +; RV64I-NEXT: addiw a0, a0, -1 +; RV64I-NEXT: and a0, a1, a0 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fadd float %a, %b + %2 = call float @llvm.fabs.f32(float %1) + %3 = fadd float %2, %1 + ret float %3 +} + +declare float @llvm.minnum.f32(float, float) + +define float @fmin_s(float %a, float %b) nounwind { +; CHECKIF-LABEL: fmin_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmin.s fa0, fa0, fa1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fmin_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fminf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmin_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fminf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.minnum.f32(float %a, float %b) + ret float %1 +} + +declare float @llvm.maxnum.f32(float, float) + +define float @fmax_s(float %a, float %b) nounwind { +; CHECKIF-LABEL: fmax_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmax.s fa0, fa0, fa1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fmax_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fmaxf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmax_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fmaxf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.maxnum.f32(float %a, float %b) + ret float %1 +} + +declare float @llvm.fma.f32(float, float, float) + +define float @fmadd_s(float %a, float %b, float %c) nounwind { +; CHECKIF-LABEL: fmadd_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmadd.s fa0, fa0, fa1, fa2 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fmadd_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fmaf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmadd_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fmaf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.fma.f32(float %a, float %b, float %c) + ret float %1 +} + +define float @fmsub_s(float %a, float %b, float %c) nounwind { +; CHECKIF-LABEL: fmsub_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmv.w.x fa5, zero +; CHECKIF-NEXT: fadd.s fa5, fa2, fa5 +; CHECKIF-NEXT: fmsub.s fa0, fa0, fa1, fa5 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fmsub_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: lui a0, %hi(.LCPI11_0) +; RV32I-NEXT: lw a1, %lo(.LCPI11_0)(a0) +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: xor a2, a0, a2 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call fmaf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmsub_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: lui a0, %hi(.LCPI11_0) +; RV64I-NEXT: lw a1, %lo(.LCPI11_0)(a0) +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: lui a2, 524288 +; RV64I-NEXT: xor a2, a0, a2 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call fmaf +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %c_ = fadd float 0.0, %c ; avoid negation using xor + %negc = fneg float %c_ + %1 = call float @llvm.fma.f32(float %a, float %b, float %negc) + ret float %1 +} + +define float @fnmadd_s(float %a, float %b, float %c) nounwind { +; CHECKIF-LABEL: fnmadd_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmv.w.x fa5, zero +; CHECKIF-NEXT: fadd.s fa4, fa0, fa5 +; CHECKIF-NEXT: fadd.s fa5, fa2, fa5 +; CHECKIF-NEXT: fnmadd.s fa0, fa4, fa1, fa5 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fnmadd_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: lui a1, %hi(.LCPI12_0) +; RV32I-NEXT: lw s1, %lo(.LCPI12_0)(a1) +; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: xor a1, s3, a2 +; RV32I-NEXT: xor a2, a0, a2 +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: call fmaf +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI12_0) +; RV64I-NEXT: lw s1, %lo(.LCPI12_0)(a1) +; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: lui a2, 524288 +; RV64I-NEXT: xor a1, s3, a2 +; RV64I-NEXT: xor a2, a0, a2 +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call fmaf +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret + %a_ = fadd float 0.0, %a + %c_ = fadd float 0.0, %c + %nega = fneg float %a_ + %negc = fneg float %c_ + %1 = call float @llvm.fma.f32(float %nega, float %b, float %negc) + ret float %1 +} + +define float @fnmadd_s_2(float %a, float %b, float %c) nounwind { +; CHECKIF-LABEL: fnmadd_s_2: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmv.w.x fa5, zero +; CHECKIF-NEXT: fadd.s fa4, fa1, fa5 +; CHECKIF-NEXT: fadd.s fa5, fa2, fa5 +; CHECKIF-NEXT: fnmadd.s fa0, fa4, fa0, fa5 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fnmadd_s_2: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: lui a1, %hi(.LCPI13_0) +; RV32I-NEXT: lw s1, %lo(.LCPI13_0)(a1) +; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: xor a1, s3, a2 +; RV32I-NEXT: xor a2, a0, a2 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call fmaf +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_s_2: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI13_0) +; RV64I-NEXT: lw s1, %lo(.LCPI13_0)(a1) +; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: lui a2, 524288 +; RV64I-NEXT: xor a1, s3, a2 +; RV64I-NEXT: xor a2, a0, a2 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call fmaf +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret + %b_ = fadd float 0.0, %b + %c_ = fadd float 0.0, %c + %negb = fneg float %b_ + %negc = fneg float %c_ + %1 = call float @llvm.fma.f32(float %a, float %negb, float %negc) + ret float %1 +} + +define float @fnmadd_s_3(float %a, float %b, float %c) nounwind { +; RV32IF-LABEL: fnmadd_s_3: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmadd.s ft0, fa0, fa1, fa2 +; RV32IF-NEXT: fneg.s fa0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fnmadd_s_3: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmadd.s ft0, fa0, fa1, fa2 +; RV64IF-NEXT: fneg.s fa0, ft0 +; RV64IF-NEXT: ret +; +; CHECKIF-LABEL: fnmadd_s_3: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmadd.s fa5, fa0, fa1, fa2 +; CHECKIF-NEXT: fneg.s fa0, fa5 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fnmadd_s_3: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fmaf +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_s_3: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fmaf +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.fma.f32(float %a, float %b, float %c) + %neg = fneg float %1 + ret float %neg +} + +define float @fnmadd_nsz(float %a, float %b, float %c) nounwind { +; RV32IF-LABEL: fnmadd_nsz: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fnmadd.s fa0, fa0, fa1, fa2 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fnmadd_nsz: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fnmadd.s fa0, fa0, fa1, fa2 +; RV64IF-NEXT: ret +; +; CHECKIF-LABEL: fnmadd_nsz: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmadd.s fa5, fa0, fa1, fa2 +; CHECKIF-NEXT: fneg.s fa0, fa5 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fnmadd_nsz: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fmaf +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_nsz: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fmaf +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call nsz float @llvm.fma.f32(float %a, float %b, float %c) + %neg = fneg nsz float %1 + ret float %neg +} + +define float @fnmsub_s(float %a, float %b, float %c) nounwind { +; CHECKIF-LABEL: fnmsub_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmv.w.x fa5, zero +; CHECKIF-NEXT: fadd.s fa5, fa0, fa5 +; CHECKIF-NEXT: fnmsub.s fa0, fa5, fa1, fa2 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fnmsub_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: lui a1, %hi(.LCPI16_0) +; RV32I-NEXT: lw a1, %lo(.LCPI16_0)(a1) +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: call fmaf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmsub_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI16_0) +; RV64I-NEXT: lw a1, %lo(.LCPI16_0)(a1) +; RV64I-NEXT: mv s1, a2 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: call fmaf +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %a_ = fadd float 0.0, %a + %nega = fneg float %a_ + %1 = call float @llvm.fma.f32(float %nega, float %b, float %c) + ret float %1 +} + +define float @fnmsub_s_2(float %a, float %b, float %c) nounwind { +; CHECKIF-LABEL: fnmsub_s_2: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmv.w.x fa5, zero +; CHECKIF-NEXT: fadd.s fa5, fa1, fa5 +; CHECKIF-NEXT: fnmsub.s fa0, fa5, fa0, fa2 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fnmsub_s_2: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: lui a1, %hi(.LCPI17_0) +; RV32I-NEXT: lw a1, %lo(.LCPI17_0)(a1) +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: xor a1, a0, a1 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: call fmaf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmsub_s_2: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI17_0) +; RV64I-NEXT: lw a1, %lo(.LCPI17_0)(a1) +; RV64I-NEXT: mv s1, a2 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: xor a1, a0, a1 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: call fmaf +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %b_ = fadd float 0.0, %b + %negb = fneg float %b_ + %1 = call float @llvm.fma.f32(float %a, float %negb, float %c) + ret float %1 +} + +define float @fmadd_s_contract(float %a, float %b, float %c) nounwind { +; CHECKIF-LABEL: fmadd_s_contract: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmadd.s fa0, fa0, fa1, fa2 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fmadd_s_contract: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: call __mulsf3 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmadd_s_contract: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a2 +; RV64I-NEXT: call __mulsf3 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fmul contract float %a, %b + %2 = fadd contract float %1, %c + ret float %2 +} + +define float @fmsub_s_contract(float %a, float %b, float %c) nounwind { +; CHECKIF-LABEL: fmsub_s_contract: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmv.w.x fa5, zero +; CHECKIF-NEXT: fadd.s fa5, fa2, fa5 +; CHECKIF-NEXT: fmul.s fa4, fa0, fa1 +; CHECKIF-NEXT: fsub.s fa0, fa4, fa5 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fmsub_s_contract: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: lui a0, %hi(.LCPI19_0) +; RV32I-NEXT: lw a1, %lo(.LCPI19_0)(a0) +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __mulsf3 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: call __subsf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmsub_s_contract: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: lui a0, %hi(.LCPI19_0) +; RV64I-NEXT: lw a1, %lo(.LCPI19_0)(a0) +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __mulsf3 +; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: call __subsf3 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %c_ = fadd float 0.0, %c ; avoid negation using xor + %1 = fmul contract float %a, %b + %2 = fsub contract float %1, %c_ + ret float %2 +} + +define float @fnmadd_s_contract(float %a, float %b, float %c) nounwind { +; CHECKIF-LABEL: fnmadd_s_contract: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmv.w.x fa5, zero +; CHECKIF-NEXT: fadd.s fa4, fa0, fa5 +; CHECKIF-NEXT: fadd.s fa3, fa1, fa5 +; CHECKIF-NEXT: fadd.s fa5, fa2, fa5 +; CHECKIF-NEXT: fmul.s fa4, fa4, fa3 +; CHECKIF-NEXT: fneg.s fa4, fa4 +; CHECKIF-NEXT: fsub.s fa0, fa4, fa5 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fnmadd_s_contract: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: lui a1, %hi(.LCPI20_0) +; RV32I-NEXT: lw s1, %lo(.LCPI20_0)(a1) +; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: call __mulsf3 +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __subsf3 +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_s_contract: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI20_0) +; RV64I-NEXT: lw s1, %lo(.LCPI20_0)(a1) +; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call __mulsf3 +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __subsf3 +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret + %a_ = fadd float 0.0, %a ; avoid negation using xor + %b_ = fadd float 0.0, %b ; avoid negation using xor + %c_ = fadd float 0.0, %c ; avoid negation using xor + %1 = fmul contract float %a_, %b_ + %2 = fneg float %1 + %3 = fsub contract float %2, %c_ + ret float %3 +} + +define float @fnmsub_s_contract(float %a, float %b, float %c) nounwind { +; CHECKIF-LABEL: fnmsub_s_contract: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmv.w.x fa5, zero +; CHECKIF-NEXT: fadd.s fa4, fa0, fa5 +; CHECKIF-NEXT: fadd.s fa5, fa1, fa5 +; CHECKIF-NEXT: fnmsub.s fa0, fa4, fa5, fa2 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fnmsub_s_contract: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: lui a1, %hi(.LCPI21_0) +; RV32I-NEXT: lw s1, %lo(.LCPI21_0)(a1) +; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: call __mulsf3 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: call __subsf3 +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmsub_s_contract: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI21_0) +; RV64I-NEXT: lw s1, %lo(.LCPI21_0)(a1) +; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: call __mulsf3 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: call __subsf3 +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret + %a_ = fadd float 0.0, %a ; avoid negation using xor + %b_ = fadd float 0.0, %b ; avoid negation using xor + %1 = fmul contract float %a_, %b_ + %2 = fsub contract float %c, %1 + ret float %2 +} + +define float @fsgnjx_f32(float %x, float %y) nounwind { +; CHECKIF-LABEL: fsgnjx_f32: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: lui a0, 260096 +; CHECKIF-NEXT: fmv.w.x fa5, a0 +; CHECKIF-NEXT: fsgnj.s fa5, fa5, fa0 +; CHECKIF-NEXT: fmul.s fa0, fa5, fa1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fsgnjx_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: lui a3, 260096 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: call __mulsf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fsgnjx_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lui a2, 524288 +; RV64I-NEXT: lui a3, 260096 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: call __mulsf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %z = call float @llvm.copysign.f32(float 1.0, float %x) + %mul = fmul float %z, %y + ret float %mul +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll b/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll index 7e96d529af36f..e6df28f5f28d1 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll @@ -27,23 +27,21 @@ define i32 @fcvt_wu_s(float %a) nounwind { define i32 @fcvt_wu_s_multiple_use(float %x, ptr %y) nounwind { ; RV32IF-LABEL: fcvt_wu_s_multiple_use: ; RV32IF: # %bb.0: -; RV32IF-NEXT: fcvt.wu.s a1, fa0, rtz -; RV32IF-NEXT: li a0, 1 -; RV32IF-NEXT: beqz a1, .LBB2_2 +; RV32IF-NEXT: fcvt.wu.s a0, fa0, rtz +; RV32IF-NEXT: bnez a0, .LBB2_2 ; RV32IF-NEXT: # %bb.1: -; RV32IF-NEXT: mv a0, a1 +; RV32IF-NEXT: li a0, 1 ; RV32IF-NEXT: .LBB2_2: ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fcvt_wu_s_multiple_use: ; RV64IF: # %bb.0: -; RV64IF-NEXT: fcvt.wu.s a1, fa0, rtz -; RV64IF-NEXT: slli a0, a1, 32 -; RV64IF-NEXT: srli a2, a0, 32 -; RV64IF-NEXT: li a0, 1 -; RV64IF-NEXT: beqz a2, .LBB2_2 +; RV64IF-NEXT: fcvt.wu.s a0, fa0, rtz +; RV64IF-NEXT: slli a1, a0, 32 +; RV64IF-NEXT: srli a1, a1, 32 +; RV64IF-NEXT: bnez a1, .LBB2_2 ; RV64IF-NEXT: # %bb.1: -; RV64IF-NEXT: mv a0, a1 +; RV64IF-NEXT: li a0, 1 ; RV64IF-NEXT: .LBB2_2: ; RV64IF-NEXT: ret %a = fptoui float %x to i32 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll b/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll index 39a5beb317ab9..4d2b74ec735a1 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll @@ -11,6 +11,10 @@ ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -global-isel -mattr=+d \ ; RUN: -verify-machineinstrs -target-abi=lp64d \ ; RUN: | FileCheck -check-prefix=RV64IF %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -global-isel \ +; RUN: -verify-machineinstrs | FileCheck -check-prefix=RV32I %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -global-isel \ +; RUN: -verify-machineinstrs | FileCheck -check-prefix=RV64I %s define float @sqrt_f32(float %a) nounwind { ; RV32IF-LABEL: sqrt_f32: @@ -22,6 +26,24 @@ define float @sqrt_f32(float %a) nounwind { ; RV64IF: # %bb.0: ; RV64IF-NEXT: fsqrt.s fa0, fa0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: sqrt_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call sqrtf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: sqrt_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call sqrtf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call float @llvm.sqrt.f32(float %a) ret float %1 } @@ -36,6 +58,24 @@ define float @fma_f32(float %a, float %b, float %c) nounwind { ; RV64IF: # %bb.0: ; RV64IF-NEXT: fmadd.s fa0, fa0, fa1, fa2 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: fma_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fmaf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fma_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fmaf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call float @llvm.fma.f32(float %a, float %b, float %c) ret float %1 } @@ -50,6 +90,34 @@ define float @fmuladd_f32(float %a, float %b, float %c) nounwind { ; RV64IF: # %bb.0: ; RV64IF-NEXT: fmadd.s fa0, fa0, fa1, fa2 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: fmuladd_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: call __mulsf3 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmuladd_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a2 +; RV64I-NEXT: call __mulsf3 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) ret float %1 } @@ -64,6 +132,20 @@ define float @fabs_f32(float %a) nounwind { ; RV64IF: # %bb.0: ; RV64IF-NEXT: fabs.s fa0, fa0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: fabs_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fabs_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: ret %1 = call float @llvm.fabs.f32(float %a) ret float %1 } @@ -78,6 +160,24 @@ define float @minnum_f32(float %a, float %b) nounwind { ; RV64IF: # %bb.0: ; RV64IF-NEXT: fmin.s fa0, fa0, fa1 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: minnum_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fminf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: minnum_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fminf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call float @llvm.minnum.f32(float %a, float %b) ret float %1 } @@ -92,6 +192,24 @@ define float @maxnum_f32(float %a, float %b) nounwind { ; RV64IF: # %bb.0: ; RV64IF-NEXT: fmax.s fa0, fa0, fa1 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: maxnum_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fmaxf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: maxnum_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fmaxf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call float @llvm.maxnum.f32(float %a, float %b) ret float %1 } @@ -106,6 +224,24 @@ define float @copysign_f32(float %a, float %b) nounwind { ; RV64IF: # %bb.0: ; RV64IF-NEXT: fsgnj.s fa0, fa0, fa1 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: copysign_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: addi a3, a2, -1 +; RV32I-NEXT: and a0, a0, a3 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: copysign_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a2, 524288 +; RV64I-NEXT: addiw a3, a2, -1 +; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret %1 = call float @llvm.copysign.f32(float %a, float %b) ret float %1 } @@ -128,6 +264,24 @@ define float @ceil_f32(float %a) nounwind { ; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: ceil_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call ceilf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: ceil_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call ceilf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call float @llvm.ceil.f32(float %a) ret float %1 } @@ -150,6 +304,24 @@ define float @trunc_f32(float %a) nounwind { ; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: trunc_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call truncf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: trunc_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call truncf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call float @llvm.trunc.f32(float %a) ret float %1 } @@ -172,6 +344,24 @@ define float @rint_f32(float %a) nounwind { ; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: rint_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call rintf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: rint_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call rintf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call float @llvm.rint.f32(float %a) ret float %1 } @@ -194,6 +384,24 @@ define float @nearbyint_f32(float %a) nounwind { ; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: nearbyint_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call nearbyintf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: nearbyint_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call nearbyintf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call float @llvm.nearbyint.f32(float %a) ret float %1 } @@ -216,6 +424,24 @@ define float @round_f32(float %a) nounwind { ; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: round_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call roundf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: round_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call roundf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call float @llvm.round.f32(float %a) ret float %1 } @@ -238,6 +464,24 @@ define float @roundeven_f32(float %a) nounwind { ; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: roundeven_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call roundevenf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: roundeven_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call roundevenf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call float @llvm.roundeven.f32(float %a) ret float %1 } @@ -256,6 +500,68 @@ define i1 @fpclass(float %x) { ; RV64IF-NEXT: andi a0, a0, 927 ; RV64IF-NEXT: snez a0, a0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: fpclass: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: lui a2, 522240 +; RV32I-NEXT: lui a3, 2048 +; RV32I-NEXT: lui a4, 1046528 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: addi a3, a3, -1 +; RV32I-NEXT: and a1, a0, a1 +; RV32I-NEXT: addi a5, a1, -1 +; RV32I-NEXT: sltu a3, a5, a3 +; RV32I-NEXT: lui a5, 520192 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: add a4, a1, a4 +; RV32I-NEXT: sltu a4, a4, a5 +; RV32I-NEXT: xor a5, a1, a2 +; RV32I-NEXT: sltu a2, a2, a1 +; RV32I-NEXT: seqz a1, a1 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: seqz a5, a5 +; RV32I-NEXT: and a3, a3, a0 +; RV32I-NEXT: or a1, a1, a5 +; RV32I-NEXT: and a0, a4, a0 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fpclass: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: lui a2, 522240 +; RV64I-NEXT: slli a3, a0, 32 +; RV64I-NEXT: li a4, 1 +; RV64I-NEXT: lui a5, 2048 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: seqz a1, a0 +; RV64I-NEXT: xor a6, a0, a2 +; RV64I-NEXT: seqz a6, a6 +; RV64I-NEXT: or a1, a1, a6 +; RV64I-NEXT: lui a6, 520192 +; RV64I-NEXT: srli a3, a3, 32 +; RV64I-NEXT: xor a3, a3, a0 +; RV64I-NEXT: sub a4, a0, a4 +; RV64I-NEXT: sltu a2, a2, a0 +; RV64I-NEXT: sub a0, a0, a5 +; RV64I-NEXT: addiw a5, a5, -1 +; RV64I-NEXT: snez a3, a3 +; RV64I-NEXT: slli a4, a4, 32 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a4, a4, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: sltu a4, a4, a5 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: sltu a0, a0, a6 +; RV64I-NEXT: and a4, a4, a3 +; RV64I-NEXT: or a1, a1, a4 +; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret %cmp = call i1 @llvm.is.fpclass.f32(float %x, i32 639) ret i1 %cmp } @@ -274,6 +580,24 @@ define i1 @isnan_fpclass(float %x) { ; RV64IF-NEXT: andi a0, a0, 768 ; RV64IF-NEXT: snez a0, a0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: isnan_fpclass: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: lui a2, 522240 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: sltu a0, a2, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: isnan_fpclass: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: lui a2, 522240 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: sltu a0, a2, a0 +; RV64I-NEXT: ret %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 3) ; nan ret i1 %1 } @@ -292,6 +616,26 @@ define i1 @isqnan_fpclass(float %x) { ; RV64IF-NEXT: andi a0, a0, 512 ; RV64IF-NEXT: snez a0, a0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: isqnan_fpclass: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: lui a1, 523264 +; RV32I-NEXT: sltu a0, a0, a1 +; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: isqnan_fpclass: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 523264 +; RV64I-NEXT: sltu a0, a0, a1 +; RV64I-NEXT: xori a0, a0, 1 +; RV64I-NEXT: ret %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 2) ; qnan ret i1 %1 } @@ -310,6 +654,30 @@ define i1 @issnan_fpclass(float %x) { ; RV64IF-NEXT: andi a0, a0, 256 ; RV64IF-NEXT: snez a0, a0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: issnan_fpclass: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: lui a2, 522240 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: lui a1, 523264 +; RV32I-NEXT: sltu a2, a2, a0 +; RV32I-NEXT: sltu a0, a0, a1 +; RV32I-NEXT: and a0, a2, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: issnan_fpclass: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: lui a2, 522240 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 523264 +; RV64I-NEXT: sltu a2, a2, a0 +; RV64I-NEXT: sltu a0, a0, a1 +; RV64I-NEXT: and a0, a2, a0 +; RV64I-NEXT: ret %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 1) ; snan ret i1 %1 } @@ -328,6 +696,26 @@ define i1 @isinf_fpclass(float %x) { ; RV64IF-NEXT: andi a0, a0, 129 ; RV64IF-NEXT: snez a0, a0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: isinf_fpclass: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: lui a2, 522240 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: xor a0, a0, a2 +; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: isinf_fpclass: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: lui a2, 522240 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: xor a0, a0, a2 +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: ret %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 516) ; 0x204 = "inf" ret i1 %1 } @@ -346,6 +734,22 @@ define i1 @isposinf_fpclass(float %x) { ; RV64IF-NEXT: andi a0, a0, 128 ; RV64IF-NEXT: snez a0, a0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: isposinf_fpclass: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 522240 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: isposinf_fpclass: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 522240 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: ret %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 512) ; 0x200 = "+inf" ret i1 %1 } @@ -364,6 +768,23 @@ define i1 @isneginf_fpclass(float %x) { ; RV64IF-NEXT: andi a0, a0, 1 ; RV64IF-NEXT: snez a0, a0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: isneginf_fpclass: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 1046528 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: isneginf_fpclass: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: li a1, 511 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: slli a1, a1, 23 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: ret %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 4) ; "-inf" ret i1 %1 } @@ -382,6 +803,24 @@ define i1 @isfinite_fpclass(float %x) { ; RV64IF-NEXT: andi a0, a0, 126 ; RV64IF-NEXT: snez a0, a0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: isfinite_fpclass: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: lui a2, 522240 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: sltu a0, a0, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: isfinite_fpclass: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: lui a2, 522240 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: sltu a0, a0, a2 +; RV64I-NEXT: ret %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite" ret i1 %1 } @@ -400,6 +839,20 @@ define i1 @isposfinite_fpclass(float %x) { ; RV64IF-NEXT: andi a0, a0, 112 ; RV64IF-NEXT: snez a0, a0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: isposfinite_fpclass: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 522240 +; RV32I-NEXT: sltu a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: isposfinite_fpclass: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 522240 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: sltu a0, a0, a1 +; RV64I-NEXT: ret %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 448) ; 0x1c0 = "+finite" ret i1 %1 } @@ -418,6 +871,32 @@ define i1 @isnegfinite_fpclass(float %x) { ; RV64IF-NEXT: andi a0, a0, 14 ; RV64IF-NEXT: snez a0, a0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: isnegfinite_fpclass: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: lui a2, 522240 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a1, a0, a1 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: sltu a1, a1, a2 +; RV32I-NEXT: and a0, a1, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: isnegfinite_fpclass: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: lui a2, 522240 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: sltu a1, a1, a2 +; RV64I-NEXT: and a0, a1, a0 +; RV64I-NEXT: ret %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 56) ; 0x38 = "-finite" ret i1 %1 } @@ -436,6 +915,30 @@ define i1 @isnotfinite_fpclass(float %x) { ; RV64IF-NEXT: andi a0, a0, 897 ; RV64IF-NEXT: snez a0, a0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: isnotfinite_fpclass: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: lui a2, 522240 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: xor a1, a0, a2 +; RV32I-NEXT: seqz a1, a1 +; RV32I-NEXT: sltu a0, a2, a0 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: isnotfinite_fpclass: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: lui a2, 522240 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: xor a1, a0, a2 +; RV64I-NEXT: seqz a1, a1 +; RV64I-NEXT: sltu a0, a2, a0 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 519) ; ox207 = "inf|nan" ret i1 %1 } diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir index 73311ae287e7d..74749d8f1944b 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir @@ -507,7 +507,6 @@ # DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_FNEG (opcode {{[0-9]+}}): 1 type index, 0 imm indices -# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. the first uncovered type index: 1, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_FPEXT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices @@ -541,8 +540,8 @@ # DEBUG-NEXT: .. the first uncovered type index: 1, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_FCOPYSIGN (opcode {{[0-9]+}}): 2 type indices -# DEBUG-NEXT: .. the first uncovered type index: 2, OK -# DEBUG-NEXT: .. the first uncovered imm index: 0, OK +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_IS_FPCLASS (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. the first uncovered type index: 2, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll index 94c03c400ffa3..16c588fa2f2ce 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll @@ -196,11 +196,9 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind { define signext i32 @findLastSet_i32(i32 signext %a) nounwind { ; RV64I-LABEL: findLastSet_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -32 -; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: li s0, -1 +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: slli a1, a0, 32 ; RV64I-NEXT: srliw a2, a0, 1 ; RV64I-NEXT: lui a3, 349525 @@ -227,36 +225,37 @@ define signext i32 @findLastSet_i32(i32 signext %a) nounwind { ; RV64I-NEXT: srli a2, a0, 4 ; RV64I-NEXT: add a0, a2, a0 ; RV64I-NEXT: lui a2, 4112 -; RV64I-NEXT: srli s1, a1, 32 +; RV64I-NEXT: srli s0, a1, 32 ; RV64I-NEXT: addiw a1, a3, -241 ; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: addiw a1, a2, 257 ; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: beqz s1, .LBB3_2 +; RV64I-NEXT: beqz s0, .LBB3_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: srliw a0, a0, 24 ; RV64I-NEXT: li a1, 32 ; RV64I-NEXT: subw a1, a1, a0 -; RV64I-NEXT: xori s0, a1, 31 +; RV64I-NEXT: xori a0, a1, 31 +; RV64I-NEXT: j .LBB3_3 ; RV64I-NEXT: .LBB3_2: -; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: li a0, -1 +; RV64I-NEXT: .LBB3_3: +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: findLastSet_i32: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: slli a1, a0, 32 -; RV64ZBB-NEXT: srli a2, a1, 32 -; RV64ZBB-NEXT: li a1, -1 -; RV64ZBB-NEXT: beqz a2, .LBB3_2 +; RV64ZBB-NEXT: srli a1, a1, 32 +; RV64ZBB-NEXT: beqz a1, .LBB3_2 ; RV64ZBB-NEXT: # %bb.1: ; RV64ZBB-NEXT: clzw a0, a0 -; RV64ZBB-NEXT: xori a1, a0, 31 +; RV64ZBB-NEXT: xori a0, a0, 31 +; RV64ZBB-NEXT: ret ; RV64ZBB-NEXT: .LBB3_2: -; RV64ZBB-NEXT: mv a0, a1 +; RV64ZBB-NEXT: li a0, -1 ; RV64ZBB-NEXT: ret %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 true) %2 = xor i32 31, %1 @@ -493,14 +492,12 @@ define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind { define signext i32 @findFirstSet_i32(i32 signext %a) nounwind { ; RV64I-LABEL: findFirstSet_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -32 -; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: li s0, -1 +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: not a0, a0 -; RV64I-NEXT: addi a1, s1, -1 +; RV64I-NEXT: addi a1, s0, -1 ; RV64I-NEXT: lui a2, 349525 ; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: addiw a1, a2, 1365 @@ -521,29 +518,30 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind { ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: addiw a1, a1, 257 ; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: slli s1, s1, 32 -; RV64I-NEXT: srli s1, s1, 32 -; RV64I-NEXT: beqz s1, .LBB8_2 +; RV64I-NEXT: slli s0, s0, 32 +; RV64I-NEXT: srli s0, s0, 32 +; RV64I-NEXT: beqz s0, .LBB8_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: srliw s0, a0, 24 +; RV64I-NEXT: srliw a0, a0, 24 +; RV64I-NEXT: j .LBB8_3 ; RV64I-NEXT: .LBB8_2: -; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: li a0, -1 +; RV64I-NEXT: .LBB8_3: +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: findFirstSet_i32: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: slli a1, a0, 32 -; RV64ZBB-NEXT: srli a2, a1, 32 -; RV64ZBB-NEXT: li a1, -1 -; RV64ZBB-NEXT: beqz a2, .LBB8_2 +; RV64ZBB-NEXT: srli a1, a1, 32 +; RV64ZBB-NEXT: beqz a1, .LBB8_2 ; RV64ZBB-NEXT: # %bb.1: -; RV64ZBB-NEXT: ctzw a1, a0 +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: ret ; RV64ZBB-NEXT: .LBB8_2: -; RV64ZBB-NEXT: mv a0, a1 +; RV64ZBB-NEXT: li a0, -1 ; RV64ZBB-NEXT: ret %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true) %2 = icmp eq i32 %a, 0 diff --git a/llvm/test/CodeGen/RISCV/aext-to-sext.ll b/llvm/test/CodeGen/RISCV/aext-to-sext.ll index 888ea666d7131..f3f71a923bdc2 100644 --- a/llvm/test/CodeGen/RISCV/aext-to-sext.ll +++ b/llvm/test/CodeGen/RISCV/aext-to-sext.ll @@ -78,12 +78,14 @@ bar: define i64 @sext_phi_constants(i32 signext %c) { ; RV64I-LABEL: sext_phi_constants: ; RV64I: # %bb.0: -; RV64I-NEXT: li a1, -1 -; RV64I-NEXT: bnez a0, .LBB2_2 -; RV64I-NEXT: # %bb.1: # %iffalse -; RV64I-NEXT: li a1, -2 -; RV64I-NEXT: .LBB2_2: # %merge -; RV64I-NEXT: slli a0, a1, 32 +; RV64I-NEXT: beqz a0, .LBB2_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: li a0, -1 +; RV64I-NEXT: j .LBB2_3 +; RV64I-NEXT: .LBB2_2: # %iffalse +; RV64I-NEXT: li a0, -2 +; RV64I-NEXT: .LBB2_3: # %merge +; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: ret %a = icmp ne i32 %c, 0 diff --git a/llvm/test/CodeGen/RISCV/compress-opt-select.ll b/llvm/test/CodeGen/RISCV/compress-opt-select.ll index 2667fde89e935..f9333a45016a0 100644 --- a/llvm/test/CodeGen/RISCV/compress-opt-select.ll +++ b/llvm/test/CodeGen/RISCV/compress-opt-select.ll @@ -10,24 +10,24 @@ define i32 @ne_small_pos(i32 %in0) minsize { ; RV32IFDC-LABEL: ne_small_pos: ; RV32IFDC: # %bb.0: -; RV32IFDC-NEXT: c.mv a1, a0 -; RV32IFDC-NEXT: c.li a2, 20 -; RV32IFDC-NEXT: addi a0, zero, -99 -; RV32IFDC-NEXT: bne a1, a2, .LBB0_2 +; RV32IFDC-NEXT: c.li a1, 20 +; RV32IFDC-NEXT: bne a0, a1, .LBB0_2 ; RV32IFDC-NEXT: # %bb.1: ; RV32IFDC-NEXT: addi a0, zero, 42 +; RV32IFDC-NEXT: c.jr ra ; RV32IFDC-NEXT: .LBB0_2: +; RV32IFDC-NEXT: addi a0, zero, -99 ; RV32IFDC-NEXT: c.jr ra ; ; RV32IFD-LABEL: ne_small_pos: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi a1, a0, 0 -; RV32IFD-NEXT: addi a2, zero, 20 -; RV32IFD-NEXT: addi a0, zero, -99 -; RV32IFD-NEXT: bne a1, a2, .LBB0_2 +; RV32IFD-NEXT: addi a1, zero, 20 +; RV32IFD-NEXT: bne a0, a1, .LBB0_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: addi a0, zero, 42 +; RV32IFD-NEXT: jalr zero, 0(ra) ; RV32IFD-NEXT: .LBB0_2: +; RV32IFD-NEXT: addi a0, zero, -99 ; RV32IFD-NEXT: jalr zero, 0(ra) %cmp = icmp ne i32 %in0, 20 %toRet = select i1 %cmp, i32 -99, i32 42 @@ -38,24 +38,24 @@ define i32 @ne_small_pos(i32 %in0) minsize { define i32 @ne_small_neg(i32 %in0) minsize { ; RV32IFDC-LABEL: ne_small_neg: ; RV32IFDC: # %bb.0: -; RV32IFDC-NEXT: c.mv a1, a0 -; RV32IFDC-NEXT: c.li a2, -20 -; RV32IFDC-NEXT: addi a0, zero, -99 -; RV32IFDC-NEXT: bne a1, a2, .LBB1_2 +; RV32IFDC-NEXT: c.li a1, -20 +; RV32IFDC-NEXT: bne a0, a1, .LBB1_2 ; RV32IFDC-NEXT: # %bb.1: ; RV32IFDC-NEXT: addi a0, zero, 42 +; RV32IFDC-NEXT: c.jr ra ; RV32IFDC-NEXT: .LBB1_2: +; RV32IFDC-NEXT: addi a0, zero, -99 ; RV32IFDC-NEXT: c.jr ra ; ; RV32IFD-LABEL: ne_small_neg: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi a1, a0, 0 -; RV32IFD-NEXT: addi a2, zero, -20 -; RV32IFD-NEXT: addi a0, zero, -99 -; RV32IFD-NEXT: bne a1, a2, .LBB1_2 +; RV32IFD-NEXT: addi a1, zero, -20 +; RV32IFD-NEXT: bne a0, a1, .LBB1_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: addi a0, zero, 42 +; RV32IFD-NEXT: jalr zero, 0(ra) ; RV32IFD-NEXT: .LBB1_2: +; RV32IFD-NEXT: addi a0, zero, -99 ; RV32IFD-NEXT: jalr zero, 0(ra) %cmp = icmp ne i32 %in0, -20 %toRet = select i1 %cmp, i32 -99, i32 42 @@ -66,24 +66,24 @@ define i32 @ne_small_neg(i32 %in0) minsize { define i32 @ne_small_edge_pos(i32 %in0) minsize { ; RV32IFDC-LABEL: ne_small_edge_pos: ; RV32IFDC: # %bb.0: -; RV32IFDC-NEXT: c.mv a1, a0 -; RV32IFDC-NEXT: c.li a2, 31 -; RV32IFDC-NEXT: addi a0, zero, -99 -; RV32IFDC-NEXT: bne a1, a2, .LBB2_2 +; RV32IFDC-NEXT: c.li a1, 31 +; RV32IFDC-NEXT: bne a0, a1, .LBB2_2 ; RV32IFDC-NEXT: # %bb.1: ; RV32IFDC-NEXT: addi a0, zero, 42 +; RV32IFDC-NEXT: c.jr ra ; RV32IFDC-NEXT: .LBB2_2: +; RV32IFDC-NEXT: addi a0, zero, -99 ; RV32IFDC-NEXT: c.jr ra ; ; RV32IFD-LABEL: ne_small_edge_pos: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi a1, a0, 0 -; RV32IFD-NEXT: addi a2, zero, 31 -; RV32IFD-NEXT: addi a0, zero, -99 -; RV32IFD-NEXT: bne a1, a2, .LBB2_2 +; RV32IFD-NEXT: addi a1, zero, 31 +; RV32IFD-NEXT: bne a0, a1, .LBB2_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: addi a0, zero, 42 +; RV32IFD-NEXT: jalr zero, 0(ra) ; RV32IFD-NEXT: .LBB2_2: +; RV32IFD-NEXT: addi a0, zero, -99 ; RV32IFD-NEXT: jalr zero, 0(ra) %cmp = icmp ne i32 %in0, 31 %toRet = select i1 %cmp, i32 -99, i32 42 @@ -94,24 +94,24 @@ define i32 @ne_small_edge_pos(i32 %in0) minsize { define i32 @ne_small_edge_neg(i32 %in0) minsize { ; RV32IFDC-LABEL: ne_small_edge_neg: ; RV32IFDC: # %bb.0: -; RV32IFDC-NEXT: c.mv a1, a0 -; RV32IFDC-NEXT: c.li a2, -32 -; RV32IFDC-NEXT: addi a0, zero, -99 -; RV32IFDC-NEXT: bne a1, a2, .LBB3_2 +; RV32IFDC-NEXT: c.li a1, -32 +; RV32IFDC-NEXT: bne a0, a1, .LBB3_2 ; RV32IFDC-NEXT: # %bb.1: ; RV32IFDC-NEXT: addi a0, zero, 42 +; RV32IFDC-NEXT: c.jr ra ; RV32IFDC-NEXT: .LBB3_2: +; RV32IFDC-NEXT: addi a0, zero, -99 ; RV32IFDC-NEXT: c.jr ra ; ; RV32IFD-LABEL: ne_small_edge_neg: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi a1, a0, 0 -; RV32IFD-NEXT: addi a2, zero, -32 -; RV32IFD-NEXT: addi a0, zero, -99 -; RV32IFD-NEXT: bne a1, a2, .LBB3_2 +; RV32IFD-NEXT: addi a1, zero, -32 +; RV32IFD-NEXT: bne a0, a1, .LBB3_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: addi a0, zero, 42 +; RV32IFD-NEXT: jalr zero, 0(ra) ; RV32IFD-NEXT: .LBB3_2: +; RV32IFD-NEXT: addi a0, zero, -99 ; RV32IFD-NEXT: jalr zero, 0(ra) %cmp = icmp ne i32 %in0, -32 %toRet = select i1 %cmp, i32 -99, i32 42 @@ -123,23 +123,24 @@ define i32 @ne_small_edge_neg(i32 %in0) minsize { define i32 @ne_medium_ledge_pos(i32 %in0) minsize { ; RV32IFDC-LABEL: ne_medium_ledge_pos: ; RV32IFDC: # %bb.0: -; RV32IFDC-NEXT: addi a1, a0, -33 -; RV32IFDC-NEXT: addi a0, zero, -99 -; RV32IFDC-NEXT: c.bnez a1, .LBB4_2 +; RV32IFDC-NEXT: addi a0, a0, -33 +; RV32IFDC-NEXT: c.bnez a0, .LBB4_2 ; RV32IFDC-NEXT: # %bb.1: ; RV32IFDC-NEXT: addi a0, zero, 42 +; RV32IFDC-NEXT: c.jr ra ; RV32IFDC-NEXT: .LBB4_2: +; RV32IFDC-NEXT: addi a0, zero, -99 ; RV32IFDC-NEXT: c.jr ra ; ; RV32IFD-LABEL: ne_medium_ledge_pos: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi a1, a0, 0 -; RV32IFD-NEXT: addi a2, zero, 33 -; RV32IFD-NEXT: addi a0, zero, -99 -; RV32IFD-NEXT: bne a1, a2, .LBB4_2 +; RV32IFD-NEXT: addi a1, zero, 33 +; RV32IFD-NEXT: bne a0, a1, .LBB4_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: addi a0, zero, 42 +; RV32IFD-NEXT: jalr zero, 0(ra) ; RV32IFD-NEXT: .LBB4_2: +; RV32IFD-NEXT: addi a0, zero, -99 ; RV32IFD-NEXT: jalr zero, 0(ra) %cmp = icmp ne i32 %in0, 33 %toRet = select i1 %cmp, i32 -99, i32 42 @@ -151,23 +152,24 @@ define i32 @ne_medium_ledge_pos(i32 %in0) minsize { define i32 @ne_medium_ledge_neg(i32 %in0) minsize { ; RV32IFDC-LABEL: ne_medium_ledge_neg: ; RV32IFDC: # %bb.0: -; RV32IFDC-NEXT: addi a1, a0, 33 -; RV32IFDC-NEXT: addi a0, zero, -99 -; RV32IFDC-NEXT: c.bnez a1, .LBB5_2 +; RV32IFDC-NEXT: addi a0, a0, 33 +; RV32IFDC-NEXT: c.bnez a0, .LBB5_2 ; RV32IFDC-NEXT: # %bb.1: ; RV32IFDC-NEXT: addi a0, zero, 42 +; RV32IFDC-NEXT: c.jr ra ; RV32IFDC-NEXT: .LBB5_2: +; RV32IFDC-NEXT: addi a0, zero, -99 ; RV32IFDC-NEXT: c.jr ra ; ; RV32IFD-LABEL: ne_medium_ledge_neg: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi a1, a0, 0 -; RV32IFD-NEXT: addi a2, zero, -33 -; RV32IFD-NEXT: addi a0, zero, -99 -; RV32IFD-NEXT: bne a1, a2, .LBB5_2 +; RV32IFD-NEXT: addi a1, zero, -33 +; RV32IFD-NEXT: bne a0, a1, .LBB5_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: addi a0, zero, 42 +; RV32IFD-NEXT: jalr zero, 0(ra) ; RV32IFD-NEXT: .LBB5_2: +; RV32IFD-NEXT: addi a0, zero, -99 ; RV32IFD-NEXT: jalr zero, 0(ra) %cmp = icmp ne i32 %in0, -33 %toRet = select i1 %cmp, i32 -99, i32 42 @@ -179,23 +181,24 @@ define i32 @ne_medium_ledge_neg(i32 %in0) minsize { define i32 @ne_medium_pos(i32 %in0) minsize { ; RV32IFDC-LABEL: ne_medium_pos: ; RV32IFDC: # %bb.0: -; RV32IFDC-NEXT: addi a1, a0, -63 -; RV32IFDC-NEXT: addi a0, zero, -99 -; RV32IFDC-NEXT: c.bnez a1, .LBB6_2 +; RV32IFDC-NEXT: addi a0, a0, -63 +; RV32IFDC-NEXT: c.bnez a0, .LBB6_2 ; RV32IFDC-NEXT: # %bb.1: ; RV32IFDC-NEXT: addi a0, zero, 42 +; RV32IFDC-NEXT: c.jr ra ; RV32IFDC-NEXT: .LBB6_2: +; RV32IFDC-NEXT: addi a0, zero, -99 ; RV32IFDC-NEXT: c.jr ra ; ; RV32IFD-LABEL: ne_medium_pos: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi a1, a0, 0 -; RV32IFD-NEXT: addi a2, zero, 63 -; RV32IFD-NEXT: addi a0, zero, -99 -; RV32IFD-NEXT: bne a1, a2, .LBB6_2 +; RV32IFD-NEXT: addi a1, zero, 63 +; RV32IFD-NEXT: bne a0, a1, .LBB6_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: addi a0, zero, 42 +; RV32IFD-NEXT: jalr zero, 0(ra) ; RV32IFD-NEXT: .LBB6_2: +; RV32IFD-NEXT: addi a0, zero, -99 ; RV32IFD-NEXT: jalr zero, 0(ra) %cmp = icmp ne i32 %in0, 63 %toRet = select i1 %cmp, i32 -99, i32 42 @@ -207,23 +210,24 @@ define i32 @ne_medium_pos(i32 %in0) minsize { define i32 @ne_medium_neg(i32 %in0) minsize { ; RV32IFDC-LABEL: ne_medium_neg: ; RV32IFDC: # %bb.0: -; RV32IFDC-NEXT: addi a1, a0, 63 -; RV32IFDC-NEXT: addi a0, zero, -99 -; RV32IFDC-NEXT: c.bnez a1, .LBB7_2 +; RV32IFDC-NEXT: addi a0, a0, 63 +; RV32IFDC-NEXT: c.bnez a0, .LBB7_2 ; RV32IFDC-NEXT: # %bb.1: ; RV32IFDC-NEXT: addi a0, zero, 42 +; RV32IFDC-NEXT: c.jr ra ; RV32IFDC-NEXT: .LBB7_2: +; RV32IFDC-NEXT: addi a0, zero, -99 ; RV32IFDC-NEXT: c.jr ra ; ; RV32IFD-LABEL: ne_medium_neg: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi a1, a0, 0 -; RV32IFD-NEXT: addi a2, zero, -63 -; RV32IFD-NEXT: addi a0, zero, -99 -; RV32IFD-NEXT: bne a1, a2, .LBB7_2 +; RV32IFD-NEXT: addi a1, zero, -63 +; RV32IFD-NEXT: bne a0, a1, .LBB7_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: addi a0, zero, 42 +; RV32IFD-NEXT: jalr zero, 0(ra) ; RV32IFD-NEXT: .LBB7_2: +; RV32IFD-NEXT: addi a0, zero, -99 ; RV32IFD-NEXT: jalr zero, 0(ra) %cmp = icmp ne i32 %in0, -63 %toRet = select i1 %cmp, i32 -99, i32 42 @@ -235,23 +239,24 @@ define i32 @ne_medium_neg(i32 %in0) minsize { define i32 @ne_medium_bedge_pos(i32 %in0) minsize { ; RV32IFDC-LABEL: ne_medium_bedge_pos: ; RV32IFDC: # %bb.0: -; RV32IFDC-NEXT: addi a1, a0, -2047 -; RV32IFDC-NEXT: addi a0, zero, -99 -; RV32IFDC-NEXT: c.bnez a1, .LBB8_2 +; RV32IFDC-NEXT: addi a0, a0, -2047 +; RV32IFDC-NEXT: c.bnez a0, .LBB8_2 ; RV32IFDC-NEXT: # %bb.1: ; RV32IFDC-NEXT: addi a0, zero, 42 +; RV32IFDC-NEXT: c.jr ra ; RV32IFDC-NEXT: .LBB8_2: +; RV32IFDC-NEXT: addi a0, zero, -99 ; RV32IFDC-NEXT: c.jr ra ; ; RV32IFD-LABEL: ne_medium_bedge_pos: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi a1, a0, 0 -; RV32IFD-NEXT: addi a2, zero, 2047 -; RV32IFD-NEXT: addi a0, zero, -99 -; RV32IFD-NEXT: bne a1, a2, .LBB8_2 +; RV32IFD-NEXT: addi a1, zero, 2047 +; RV32IFD-NEXT: bne a0, a1, .LBB8_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: addi a0, zero, 42 +; RV32IFD-NEXT: jalr zero, 0(ra) ; RV32IFD-NEXT: .LBB8_2: +; RV32IFD-NEXT: addi a0, zero, -99 ; RV32IFD-NEXT: jalr zero, 0(ra) %cmp = icmp ne i32 %in0, 2047 %toRet = select i1 %cmp, i32 -99, i32 42 @@ -263,23 +268,24 @@ define i32 @ne_medium_bedge_pos(i32 %in0) minsize { define i32 @ne_medium_bedge_neg(i32 %in0) minsize { ; RV32IFDC-LABEL: ne_medium_bedge_neg: ; RV32IFDC: # %bb.0: -; RV32IFDC-NEXT: addi a1, a0, 2047 -; RV32IFDC-NEXT: addi a0, zero, -99 -; RV32IFDC-NEXT: c.bnez a1, .LBB9_2 +; RV32IFDC-NEXT: addi a0, a0, 2047 +; RV32IFDC-NEXT: c.bnez a0, .LBB9_2 ; RV32IFDC-NEXT: # %bb.1: ; RV32IFDC-NEXT: addi a0, zero, 42 +; RV32IFDC-NEXT: c.jr ra ; RV32IFDC-NEXT: .LBB9_2: +; RV32IFDC-NEXT: addi a0, zero, -99 ; RV32IFDC-NEXT: c.jr ra ; ; RV32IFD-LABEL: ne_medium_bedge_neg: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi a1, a0, 0 -; RV32IFD-NEXT: addi a2, zero, -2047 -; RV32IFD-NEXT: addi a0, zero, -99 -; RV32IFD-NEXT: bne a1, a2, .LBB9_2 +; RV32IFD-NEXT: addi a1, zero, -2047 +; RV32IFD-NEXT: bne a0, a1, .LBB9_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: addi a0, zero, 42 +; RV32IFD-NEXT: jalr zero, 0(ra) ; RV32IFD-NEXT: .LBB9_2: +; RV32IFD-NEXT: addi a0, zero, -99 ; RV32IFD-NEXT: jalr zero, 0(ra) %cmp = icmp ne i32 %in0, -2047 %toRet = select i1 %cmp, i32 -99, i32 42 @@ -290,26 +296,26 @@ define i32 @ne_medium_bedge_neg(i32 %in0) minsize { define i32 @ne_big_ledge_pos(i32 %in0) minsize { ; RV32IFDC-LABEL: ne_big_ledge_pos: ; RV32IFDC: # %bb.0: -; RV32IFDC-NEXT: c.mv a1, a0 -; RV32IFDC-NEXT: c.li a0, 1 -; RV32IFDC-NEXT: slli a2, a0, 11 -; RV32IFDC-NEXT: addi a0, zero, -99 -; RV32IFDC-NEXT: bne a1, a2, .LBB10_2 +; RV32IFDC-NEXT: c.li a1, 1 +; RV32IFDC-NEXT: c.slli a1, 11 +; RV32IFDC-NEXT: bne a0, a1, .LBB10_2 ; RV32IFDC-NEXT: # %bb.1: ; RV32IFDC-NEXT: addi a0, zero, 42 +; RV32IFDC-NEXT: c.jr ra ; RV32IFDC-NEXT: .LBB10_2: +; RV32IFDC-NEXT: addi a0, zero, -99 ; RV32IFDC-NEXT: c.jr ra ; ; RV32IFD-LABEL: ne_big_ledge_pos: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi a1, a0, 0 -; RV32IFD-NEXT: addi a0, zero, 1 -; RV32IFD-NEXT: slli a2, a0, 11 -; RV32IFD-NEXT: addi a0, zero, -99 -; RV32IFD-NEXT: bne a1, a2, .LBB10_2 +; RV32IFD-NEXT: addi a1, zero, 1 +; RV32IFD-NEXT: slli a1, a1, 11 +; RV32IFD-NEXT: bne a0, a1, .LBB10_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: addi a0, zero, 42 +; RV32IFD-NEXT: jalr zero, 0(ra) ; RV32IFD-NEXT: .LBB10_2: +; RV32IFD-NEXT: addi a0, zero, -99 ; RV32IFD-NEXT: jalr zero, 0(ra) %cmp = icmp ne i32 %in0, 2048 %toRet = select i1 %cmp, i32 -99, i32 42 @@ -320,24 +326,24 @@ define i32 @ne_big_ledge_pos(i32 %in0) minsize { define i32 @ne_big_ledge_neg(i32 %in0) minsize { ; RV32IFDC-LABEL: ne_big_ledge_neg: ; RV32IFDC: # %bb.0: -; RV32IFDC-NEXT: c.mv a1, a0 -; RV32IFDC-NEXT: addi a2, zero, -2048 -; RV32IFDC-NEXT: addi a0, zero, -99 -; RV32IFDC-NEXT: bne a1, a2, .LBB11_2 +; RV32IFDC-NEXT: addi a1, zero, -2048 +; RV32IFDC-NEXT: bne a0, a1, .LBB11_2 ; RV32IFDC-NEXT: # %bb.1: ; RV32IFDC-NEXT: addi a0, zero, 42 +; RV32IFDC-NEXT: c.jr ra ; RV32IFDC-NEXT: .LBB11_2: +; RV32IFDC-NEXT: addi a0, zero, -99 ; RV32IFDC-NEXT: c.jr ra ; ; RV32IFD-LABEL: ne_big_ledge_neg: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi a1, a0, 0 -; RV32IFD-NEXT: addi a2, zero, -2048 -; RV32IFD-NEXT: addi a0, zero, -99 -; RV32IFD-NEXT: bne a1, a2, .LBB11_2 +; RV32IFD-NEXT: addi a1, zero, -2048 +; RV32IFD-NEXT: bne a0, a1, .LBB11_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: addi a0, zero, 42 +; RV32IFD-NEXT: jalr zero, 0(ra) ; RV32IFD-NEXT: .LBB11_2: +; RV32IFD-NEXT: addi a0, zero, -99 ; RV32IFD-NEXT: jalr zero, 0(ra) %cmp = icmp ne i32 %in0, -2048 %toRet = select i1 %cmp, i32 -99, i32 42 @@ -351,24 +357,24 @@ define i32 @ne_big_ledge_neg(i32 %in0) minsize { define i32 @eq_small_pos(i32 %in0) minsize { ; RV32IFDC-LABEL: eq_small_pos: ; RV32IFDC: # %bb.0: -; RV32IFDC-NEXT: c.mv a1, a0 -; RV32IFDC-NEXT: c.li a2, 20 -; RV32IFDC-NEXT: addi a0, zero, -99 -; RV32IFDC-NEXT: beq a1, a2, .LBB12_2 +; RV32IFDC-NEXT: c.li a1, 20 +; RV32IFDC-NEXT: beq a0, a1, .LBB12_2 ; RV32IFDC-NEXT: # %bb.1: ; RV32IFDC-NEXT: addi a0, zero, 42 +; RV32IFDC-NEXT: c.jr ra ; RV32IFDC-NEXT: .LBB12_2: +; RV32IFDC-NEXT: addi a0, zero, -99 ; RV32IFDC-NEXT: c.jr ra ; ; RV32IFD-LABEL: eq_small_pos: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi a1, a0, 0 -; RV32IFD-NEXT: addi a2, zero, 20 -; RV32IFD-NEXT: addi a0, zero, -99 -; RV32IFD-NEXT: beq a1, a2, .LBB12_2 +; RV32IFD-NEXT: addi a1, zero, 20 +; RV32IFD-NEXT: beq a0, a1, .LBB12_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: addi a0, zero, 42 +; RV32IFD-NEXT: jalr zero, 0(ra) ; RV32IFD-NEXT: .LBB12_2: +; RV32IFD-NEXT: addi a0, zero, -99 ; RV32IFD-NEXT: jalr zero, 0(ra) %cmp = icmp eq i32 %in0, 20 %toRet = select i1 %cmp, i32 -99, i32 42 @@ -379,24 +385,24 @@ define i32 @eq_small_pos(i32 %in0) minsize { define i32 @eq_small_neg(i32 %in0) minsize { ; RV32IFDC-LABEL: eq_small_neg: ; RV32IFDC: # %bb.0: -; RV32IFDC-NEXT: c.mv a1, a0 -; RV32IFDC-NEXT: c.li a2, -20 -; RV32IFDC-NEXT: addi a0, zero, -99 -; RV32IFDC-NEXT: beq a1, a2, .LBB13_2 +; RV32IFDC-NEXT: c.li a1, -20 +; RV32IFDC-NEXT: beq a0, a1, .LBB13_2 ; RV32IFDC-NEXT: # %bb.1: ; RV32IFDC-NEXT: addi a0, zero, 42 +; RV32IFDC-NEXT: c.jr ra ; RV32IFDC-NEXT: .LBB13_2: +; RV32IFDC-NEXT: addi a0, zero, -99 ; RV32IFDC-NEXT: c.jr ra ; ; RV32IFD-LABEL: eq_small_neg: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi a1, a0, 0 -; RV32IFD-NEXT: addi a2, zero, -20 -; RV32IFD-NEXT: addi a0, zero, -99 -; RV32IFD-NEXT: beq a1, a2, .LBB13_2 +; RV32IFD-NEXT: addi a1, zero, -20 +; RV32IFD-NEXT: beq a0, a1, .LBB13_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: addi a0, zero, 42 +; RV32IFD-NEXT: jalr zero, 0(ra) ; RV32IFD-NEXT: .LBB13_2: +; RV32IFD-NEXT: addi a0, zero, -99 ; RV32IFD-NEXT: jalr zero, 0(ra) %cmp = icmp eq i32 %in0, -20 %toRet = select i1 %cmp, i32 -99, i32 42 @@ -407,24 +413,24 @@ define i32 @eq_small_neg(i32 %in0) minsize { define i32 @eq_small_edge_pos(i32 %in0) minsize { ; RV32IFDC-LABEL: eq_small_edge_pos: ; RV32IFDC: # %bb.0: -; RV32IFDC-NEXT: c.mv a1, a0 -; RV32IFDC-NEXT: c.li a2, 31 -; RV32IFDC-NEXT: addi a0, zero, -99 -; RV32IFDC-NEXT: beq a1, a2, .LBB14_2 +; RV32IFDC-NEXT: c.li a1, 31 +; RV32IFDC-NEXT: beq a0, a1, .LBB14_2 ; RV32IFDC-NEXT: # %bb.1: ; RV32IFDC-NEXT: addi a0, zero, 42 +; RV32IFDC-NEXT: c.jr ra ; RV32IFDC-NEXT: .LBB14_2: +; RV32IFDC-NEXT: addi a0, zero, -99 ; RV32IFDC-NEXT: c.jr ra ; ; RV32IFD-LABEL: eq_small_edge_pos: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi a1, a0, 0 -; RV32IFD-NEXT: addi a2, zero, 31 -; RV32IFD-NEXT: addi a0, zero, -99 -; RV32IFD-NEXT: beq a1, a2, .LBB14_2 +; RV32IFD-NEXT: addi a1, zero, 31 +; RV32IFD-NEXT: beq a0, a1, .LBB14_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: addi a0, zero, 42 +; RV32IFD-NEXT: jalr zero, 0(ra) ; RV32IFD-NEXT: .LBB14_2: +; RV32IFD-NEXT: addi a0, zero, -99 ; RV32IFD-NEXT: jalr zero, 0(ra) %cmp = icmp eq i32 %in0, 31 %toRet = select i1 %cmp, i32 -99, i32 42 @@ -435,24 +441,24 @@ define i32 @eq_small_edge_pos(i32 %in0) minsize { define i32 @eq_small_edge_neg(i32 %in0) minsize { ; RV32IFDC-LABEL: eq_small_edge_neg: ; RV32IFDC: # %bb.0: -; RV32IFDC-NEXT: c.mv a1, a0 -; RV32IFDC-NEXT: c.li a2, -32 -; RV32IFDC-NEXT: addi a0, zero, -99 -; RV32IFDC-NEXT: beq a1, a2, .LBB15_2 +; RV32IFDC-NEXT: c.li a1, -32 +; RV32IFDC-NEXT: beq a0, a1, .LBB15_2 ; RV32IFDC-NEXT: # %bb.1: ; RV32IFDC-NEXT: addi a0, zero, 42 +; RV32IFDC-NEXT: c.jr ra ; RV32IFDC-NEXT: .LBB15_2: +; RV32IFDC-NEXT: addi a0, zero, -99 ; RV32IFDC-NEXT: c.jr ra ; ; RV32IFD-LABEL: eq_small_edge_neg: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi a1, a0, 0 -; RV32IFD-NEXT: addi a2, zero, -32 -; RV32IFD-NEXT: addi a0, zero, -99 -; RV32IFD-NEXT: beq a1, a2, .LBB15_2 +; RV32IFD-NEXT: addi a1, zero, -32 +; RV32IFD-NEXT: beq a0, a1, .LBB15_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: addi a0, zero, 42 +; RV32IFD-NEXT: jalr zero, 0(ra) ; RV32IFD-NEXT: .LBB15_2: +; RV32IFD-NEXT: addi a0, zero, -99 ; RV32IFD-NEXT: jalr zero, 0(ra) %cmp = icmp eq i32 %in0, -32 %toRet = select i1 %cmp, i32 -99, i32 42 @@ -464,23 +470,24 @@ define i32 @eq_small_edge_neg(i32 %in0) minsize { define i32 @eq_medium_ledge_pos(i32 %in0) minsize { ; RV32IFDC-LABEL: eq_medium_ledge_pos: ; RV32IFDC: # %bb.0: -; RV32IFDC-NEXT: addi a1, a0, -33 -; RV32IFDC-NEXT: addi a0, zero, -99 -; RV32IFDC-NEXT: c.beqz a1, .LBB16_2 +; RV32IFDC-NEXT: addi a0, a0, -33 +; RV32IFDC-NEXT: c.beqz a0, .LBB16_2 ; RV32IFDC-NEXT: # %bb.1: ; RV32IFDC-NEXT: addi a0, zero, 42 +; RV32IFDC-NEXT: c.jr ra ; RV32IFDC-NEXT: .LBB16_2: +; RV32IFDC-NEXT: addi a0, zero, -99 ; RV32IFDC-NEXT: c.jr ra ; ; RV32IFD-LABEL: eq_medium_ledge_pos: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi a1, a0, 0 -; RV32IFD-NEXT: addi a2, zero, 33 -; RV32IFD-NEXT: addi a0, zero, -99 -; RV32IFD-NEXT: beq a1, a2, .LBB16_2 +; RV32IFD-NEXT: addi a1, zero, 33 +; RV32IFD-NEXT: beq a0, a1, .LBB16_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: addi a0, zero, 42 +; RV32IFD-NEXT: jalr zero, 0(ra) ; RV32IFD-NEXT: .LBB16_2: +; RV32IFD-NEXT: addi a0, zero, -99 ; RV32IFD-NEXT: jalr zero, 0(ra) %cmp = icmp eq i32 %in0, 33 %toRet = select i1 %cmp, i32 -99, i32 42 @@ -492,23 +499,24 @@ define i32 @eq_medium_ledge_pos(i32 %in0) minsize { define i32 @eq_medium_ledge_neg(i32 %in0) minsize { ; RV32IFDC-LABEL: eq_medium_ledge_neg: ; RV32IFDC: # %bb.0: -; RV32IFDC-NEXT: addi a1, a0, 33 -; RV32IFDC-NEXT: addi a0, zero, -99 -; RV32IFDC-NEXT: c.beqz a1, .LBB17_2 +; RV32IFDC-NEXT: addi a0, a0, 33 +; RV32IFDC-NEXT: c.beqz a0, .LBB17_2 ; RV32IFDC-NEXT: # %bb.1: ; RV32IFDC-NEXT: addi a0, zero, 42 +; RV32IFDC-NEXT: c.jr ra ; RV32IFDC-NEXT: .LBB17_2: +; RV32IFDC-NEXT: addi a0, zero, -99 ; RV32IFDC-NEXT: c.jr ra ; ; RV32IFD-LABEL: eq_medium_ledge_neg: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi a1, a0, 0 -; RV32IFD-NEXT: addi a2, zero, -33 -; RV32IFD-NEXT: addi a0, zero, -99 -; RV32IFD-NEXT: beq a1, a2, .LBB17_2 +; RV32IFD-NEXT: addi a1, zero, -33 +; RV32IFD-NEXT: beq a0, a1, .LBB17_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: addi a0, zero, 42 +; RV32IFD-NEXT: jalr zero, 0(ra) ; RV32IFD-NEXT: .LBB17_2: +; RV32IFD-NEXT: addi a0, zero, -99 ; RV32IFD-NEXT: jalr zero, 0(ra) %cmp = icmp eq i32 %in0, -33 %toRet = select i1 %cmp, i32 -99, i32 42 @@ -520,23 +528,24 @@ define i32 @eq_medium_ledge_neg(i32 %in0) minsize { define i32 @eq_medium_pos(i32 %in0) minsize { ; RV32IFDC-LABEL: eq_medium_pos: ; RV32IFDC: # %bb.0: -; RV32IFDC-NEXT: addi a1, a0, -63 -; RV32IFDC-NEXT: addi a0, zero, -99 -; RV32IFDC-NEXT: c.beqz a1, .LBB18_2 +; RV32IFDC-NEXT: addi a0, a0, -63 +; RV32IFDC-NEXT: c.beqz a0, .LBB18_2 ; RV32IFDC-NEXT: # %bb.1: ; RV32IFDC-NEXT: addi a0, zero, 42 +; RV32IFDC-NEXT: c.jr ra ; RV32IFDC-NEXT: .LBB18_2: +; RV32IFDC-NEXT: addi a0, zero, -99 ; RV32IFDC-NEXT: c.jr ra ; ; RV32IFD-LABEL: eq_medium_pos: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi a1, a0, 0 -; RV32IFD-NEXT: addi a2, zero, 63 -; RV32IFD-NEXT: addi a0, zero, -99 -; RV32IFD-NEXT: beq a1, a2, .LBB18_2 +; RV32IFD-NEXT: addi a1, zero, 63 +; RV32IFD-NEXT: beq a0, a1, .LBB18_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: addi a0, zero, 42 +; RV32IFD-NEXT: jalr zero, 0(ra) ; RV32IFD-NEXT: .LBB18_2: +; RV32IFD-NEXT: addi a0, zero, -99 ; RV32IFD-NEXT: jalr zero, 0(ra) %cmp = icmp eq i32 %in0, 63 %toRet = select i1 %cmp, i32 -99, i32 42 @@ -548,23 +557,24 @@ define i32 @eq_medium_pos(i32 %in0) minsize { define i32 @eq_medium_neg(i32 %in0) minsize { ; RV32IFDC-LABEL: eq_medium_neg: ; RV32IFDC: # %bb.0: -; RV32IFDC-NEXT: addi a1, a0, 63 -; RV32IFDC-NEXT: addi a0, zero, -99 -; RV32IFDC-NEXT: c.beqz a1, .LBB19_2 +; RV32IFDC-NEXT: addi a0, a0, 63 +; RV32IFDC-NEXT: c.beqz a0, .LBB19_2 ; RV32IFDC-NEXT: # %bb.1: ; RV32IFDC-NEXT: addi a0, zero, 42 +; RV32IFDC-NEXT: c.jr ra ; RV32IFDC-NEXT: .LBB19_2: +; RV32IFDC-NEXT: addi a0, zero, -99 ; RV32IFDC-NEXT: c.jr ra ; ; RV32IFD-LABEL: eq_medium_neg: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi a1, a0, 0 -; RV32IFD-NEXT: addi a2, zero, -63 -; RV32IFD-NEXT: addi a0, zero, -99 -; RV32IFD-NEXT: beq a1, a2, .LBB19_2 +; RV32IFD-NEXT: addi a1, zero, -63 +; RV32IFD-NEXT: beq a0, a1, .LBB19_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: addi a0, zero, 42 +; RV32IFD-NEXT: jalr zero, 0(ra) ; RV32IFD-NEXT: .LBB19_2: +; RV32IFD-NEXT: addi a0, zero, -99 ; RV32IFD-NEXT: jalr zero, 0(ra) %cmp = icmp eq i32 %in0, -63 %toRet = select i1 %cmp, i32 -99, i32 42 @@ -576,23 +586,24 @@ define i32 @eq_medium_neg(i32 %in0) minsize { define i32 @eq_medium_bedge_pos(i32 %in0) minsize { ; RV32IFDC-LABEL: eq_medium_bedge_pos: ; RV32IFDC: # %bb.0: -; RV32IFDC-NEXT: addi a1, a0, -2047 -; RV32IFDC-NEXT: addi a0, zero, -99 -; RV32IFDC-NEXT: c.beqz a1, .LBB20_2 +; RV32IFDC-NEXT: addi a0, a0, -2047 +; RV32IFDC-NEXT: c.beqz a0, .LBB20_2 ; RV32IFDC-NEXT: # %bb.1: ; RV32IFDC-NEXT: addi a0, zero, 42 +; RV32IFDC-NEXT: c.jr ra ; RV32IFDC-NEXT: .LBB20_2: +; RV32IFDC-NEXT: addi a0, zero, -99 ; RV32IFDC-NEXT: c.jr ra ; ; RV32IFD-LABEL: eq_medium_bedge_pos: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi a1, a0, 0 -; RV32IFD-NEXT: addi a2, zero, 2047 -; RV32IFD-NEXT: addi a0, zero, -99 -; RV32IFD-NEXT: beq a1, a2, .LBB20_2 +; RV32IFD-NEXT: addi a1, zero, 2047 +; RV32IFD-NEXT: beq a0, a1, .LBB20_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: addi a0, zero, 42 +; RV32IFD-NEXT: jalr zero, 0(ra) ; RV32IFD-NEXT: .LBB20_2: +; RV32IFD-NEXT: addi a0, zero, -99 ; RV32IFD-NEXT: jalr zero, 0(ra) %cmp = icmp eq i32 %in0, 2047 %toRet = select i1 %cmp, i32 -99, i32 42 @@ -604,23 +615,24 @@ define i32 @eq_medium_bedge_pos(i32 %in0) minsize { define i32 @eq_medium_bedge_neg(i32 %in0) minsize { ; RV32IFDC-LABEL: eq_medium_bedge_neg: ; RV32IFDC: # %bb.0: -; RV32IFDC-NEXT: addi a1, a0, 2047 -; RV32IFDC-NEXT: addi a0, zero, -99 -; RV32IFDC-NEXT: c.beqz a1, .LBB21_2 +; RV32IFDC-NEXT: addi a0, a0, 2047 +; RV32IFDC-NEXT: c.beqz a0, .LBB21_2 ; RV32IFDC-NEXT: # %bb.1: ; RV32IFDC-NEXT: addi a0, zero, 42 +; RV32IFDC-NEXT: c.jr ra ; RV32IFDC-NEXT: .LBB21_2: +; RV32IFDC-NEXT: addi a0, zero, -99 ; RV32IFDC-NEXT: c.jr ra ; ; RV32IFD-LABEL: eq_medium_bedge_neg: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi a1, a0, 0 -; RV32IFD-NEXT: addi a2, zero, -2047 -; RV32IFD-NEXT: addi a0, zero, -99 -; RV32IFD-NEXT: beq a1, a2, .LBB21_2 +; RV32IFD-NEXT: addi a1, zero, -2047 +; RV32IFD-NEXT: beq a0, a1, .LBB21_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: addi a0, zero, 42 +; RV32IFD-NEXT: jalr zero, 0(ra) ; RV32IFD-NEXT: .LBB21_2: +; RV32IFD-NEXT: addi a0, zero, -99 ; RV32IFD-NEXT: jalr zero, 0(ra) %cmp = icmp eq i32 %in0, -2047 %toRet = select i1 %cmp, i32 -99, i32 42 @@ -631,26 +643,26 @@ define i32 @eq_medium_bedge_neg(i32 %in0) minsize { define i32 @eq_big_ledge_pos(i32 %in0) minsize { ; RV32IFDC-LABEL: eq_big_ledge_pos: ; RV32IFDC: # %bb.0: -; RV32IFDC-NEXT: c.mv a1, a0 -; RV32IFDC-NEXT: c.li a0, 1 -; RV32IFDC-NEXT: slli a2, a0, 11 -; RV32IFDC-NEXT: addi a0, zero, -99 -; RV32IFDC-NEXT: beq a1, a2, .LBB22_2 +; RV32IFDC-NEXT: c.li a1, 1 +; RV32IFDC-NEXT: c.slli a1, 11 +; RV32IFDC-NEXT: beq a0, a1, .LBB22_2 ; RV32IFDC-NEXT: # %bb.1: ; RV32IFDC-NEXT: addi a0, zero, 42 +; RV32IFDC-NEXT: c.jr ra ; RV32IFDC-NEXT: .LBB22_2: +; RV32IFDC-NEXT: addi a0, zero, -99 ; RV32IFDC-NEXT: c.jr ra ; ; RV32IFD-LABEL: eq_big_ledge_pos: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi a1, a0, 0 -; RV32IFD-NEXT: addi a0, zero, 1 -; RV32IFD-NEXT: slli a2, a0, 11 -; RV32IFD-NEXT: addi a0, zero, -99 -; RV32IFD-NEXT: beq a1, a2, .LBB22_2 +; RV32IFD-NEXT: addi a1, zero, 1 +; RV32IFD-NEXT: slli a1, a1, 11 +; RV32IFD-NEXT: beq a0, a1, .LBB22_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: addi a0, zero, 42 +; RV32IFD-NEXT: jalr zero, 0(ra) ; RV32IFD-NEXT: .LBB22_2: +; RV32IFD-NEXT: addi a0, zero, -99 ; RV32IFD-NEXT: jalr zero, 0(ra) %cmp = icmp eq i32 %in0, 2048 %toRet = select i1 %cmp, i32 -99, i32 42 @@ -661,24 +673,24 @@ define i32 @eq_big_ledge_pos(i32 %in0) minsize { define i32 @eq_big_ledge_neg(i32 %in0) minsize { ; RV32IFDC-LABEL: eq_big_ledge_neg: ; RV32IFDC: # %bb.0: -; RV32IFDC-NEXT: c.mv a1, a0 -; RV32IFDC-NEXT: addi a2, zero, -2048 -; RV32IFDC-NEXT: addi a0, zero, -99 -; RV32IFDC-NEXT: beq a1, a2, .LBB23_2 +; RV32IFDC-NEXT: addi a1, zero, -2048 +; RV32IFDC-NEXT: beq a0, a1, .LBB23_2 ; RV32IFDC-NEXT: # %bb.1: ; RV32IFDC-NEXT: addi a0, zero, 42 +; RV32IFDC-NEXT: c.jr ra ; RV32IFDC-NEXT: .LBB23_2: +; RV32IFDC-NEXT: addi a0, zero, -99 ; RV32IFDC-NEXT: c.jr ra ; ; RV32IFD-LABEL: eq_big_ledge_neg: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi a1, a0, 0 -; RV32IFD-NEXT: addi a2, zero, -2048 -; RV32IFD-NEXT: addi a0, zero, -99 -; RV32IFD-NEXT: beq a1, a2, .LBB23_2 +; RV32IFD-NEXT: addi a1, zero, -2048 +; RV32IFD-NEXT: beq a0, a1, .LBB23_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: addi a0, zero, 42 +; RV32IFD-NEXT: jalr zero, 0(ra) ; RV32IFD-NEXT: .LBB23_2: +; RV32IFD-NEXT: addi a0, zero, -99 ; RV32IFD-NEXT: jalr zero, 0(ra) %cmp = icmp eq i32 %in0, -2048 %toRet = select i1 %cmp, i32 -99, i32 42 diff --git a/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll b/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll index 3adc46143f9f2..f463bb2009f95 100644 --- a/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll +++ b/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll @@ -1695,3 +1695,61 @@ define double @atan2_f64(double %a, double %b) nounwind strictfp { %1 = call double @llvm.experimental.constrained.atan2.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp ret double %1 } + +define double @ldexp_f64(double %x, i32 signext %y) nounwind { +; RV32IFD-LABEL: ldexp_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call ldexp +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: ldexp_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call ldexp +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret +; +; RV32IZFINXZDINX-LABEL: ldexp_f64: +; RV32IZFINXZDINX: # %bb.0: +; RV32IZFINXZDINX-NEXT: addi sp, sp, -16 +; RV32IZFINXZDINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: call ldexp +; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: addi sp, sp, 16 +; RV32IZFINXZDINX-NEXT: ret +; +; RV64IZFINXZDINX-LABEL: ldexp_f64: +; RV64IZFINXZDINX: # %bb.0: +; RV64IZFINXZDINX-NEXT: addi sp, sp, -16 +; RV64IZFINXZDINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFINXZDINX-NEXT: call ldexp +; RV64IZFINXZDINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFINXZDINX-NEXT: addi sp, sp, 16 +; RV64IZFINXZDINX-NEXT: ret +; +; RV32I-LABEL: ldexp_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call ldexp +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: ldexp_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call ldexp +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %z = call double @llvm.experimental.constrained.ldexp.f64.i32(double %x, i32 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %z +} diff --git a/llvm/test/CodeGen/RISCV/double-intrinsics.ll b/llvm/test/CodeGen/RISCV/double-intrinsics.ll index 3ef128ed6d4cd..ebeca7c0c362a 100644 --- a/llvm/test/CodeGen/RISCV/double-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/double-intrinsics.ll @@ -1637,3 +1637,135 @@ define double @minimumnum_double(double %x, double %y) { %z = call double @llvm.minimumnum.f64(double %x, double %y) ret double %z } + +define double @ldexp_double(double %x, i32 signext %y) nounwind { +; RV32IFD-LABEL: ldexp_double: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: tail ldexp +; +; RV64IFD-LABEL: ldexp_double: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call ldexp +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret +; +; RV32IZFINXZDINX-LABEL: ldexp_double: +; RV32IZFINXZDINX: # %bb.0: +; RV32IZFINXZDINX-NEXT: addi sp, sp, -16 +; RV32IZFINXZDINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: call ldexp +; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: addi sp, sp, 16 +; RV32IZFINXZDINX-NEXT: ret +; +; RV64IZFINXZDINX-LABEL: ldexp_double: +; RV64IZFINXZDINX: # %bb.0: +; RV64IZFINXZDINX-NEXT: addi sp, sp, -16 +; RV64IZFINXZDINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFINXZDINX-NEXT: call ldexp +; RV64IZFINXZDINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFINXZDINX-NEXT: addi sp, sp, 16 +; RV64IZFINXZDINX-NEXT: ret +; +; RV32I-LABEL: ldexp_double: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call ldexp +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: ldexp_double: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call ldexp +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %z = call double @llvm.ldexp.f64.i32(double %x, i32 %y) + ret double %z +} + +define {double, i32} @frexp_double(double %x) nounwind { +; RV32IFD-LABEL: frexp_double: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: addi a0, sp, 8 +; RV32IFD-NEXT: call frexp +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: frexp_double: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: mv a0, sp +; RV64IFD-NEXT: call frexp +; RV64IFD-NEXT: ld a0, 0(sp) +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret +; +; RV32IZFINXZDINX-LABEL: frexp_double: +; RV32IZFINXZDINX: # %bb.0: +; RV32IZFINXZDINX-NEXT: addi sp, sp, -16 +; RV32IZFINXZDINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: addi a2, sp, 8 +; RV32IZFINXZDINX-NEXT: call frexp +; RV32IZFINXZDINX-NEXT: lw a2, 8(sp) +; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: addi sp, sp, 16 +; RV32IZFINXZDINX-NEXT: ret +; +; RV64IZFINXZDINX-LABEL: frexp_double: +; RV64IZFINXZDINX: # %bb.0: +; RV64IZFINXZDINX-NEXT: addi sp, sp, -16 +; RV64IZFINXZDINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFINXZDINX-NEXT: mv a1, sp +; RV64IZFINXZDINX-NEXT: call frexp +; RV64IZFINXZDINX-NEXT: ld a1, 0(sp) +; RV64IZFINXZDINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFINXZDINX-NEXT: addi sp, sp, 16 +; RV64IZFINXZDINX-NEXT: ret +; +; RV32I-LABEL: frexp_double: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a3, a2 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: addi a2, sp, 4 +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: call frexp +; RV32I-NEXT: lw a2, 4(sp) +; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: sw a1, 4(s0) +; RV32I-NEXT: sw a2, 8(s0) +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: frexp_double: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi a1, sp, 4 +; RV64I-NEXT: call frexp +; RV64I-NEXT: lw a1, 4(sp) +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %a = call {double, i32} @llvm.frexp.f64.i32(double %x) + ret {double, i32} %a +} diff --git a/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll b/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll index f04da712dce31..4c383be1ac42c 100644 --- a/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll +++ b/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll @@ -1660,3 +1660,61 @@ define i64 @llround_f32(float %a) nounwind strictfp { %1 = call i64 @llvm.experimental.constrained.llround.i64.f32(float %a, metadata !"fpexcept.strict") strictfp ret i64 %1 } + +define float @ldexp_f32(float %x, i32 signext %y) nounwind { +; RV32IF-LABEL: ldexp_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call ldexpf +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: ldexp_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call ldexpf +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV32IZFINX-LABEL: ldexp_f32: +; RV32IZFINX: # %bb.0: +; RV32IZFINX-NEXT: addi sp, sp, -16 +; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFINX-NEXT: call ldexpf +; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFINX-NEXT: addi sp, sp, 16 +; RV32IZFINX-NEXT: ret +; +; RV64IZFINX-LABEL: ldexp_f32: +; RV64IZFINX: # %bb.0: +; RV64IZFINX-NEXT: addi sp, sp, -16 +; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFINX-NEXT: call ldexpf +; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFINX-NEXT: addi sp, sp, 16 +; RV64IZFINX-NEXT: ret +; +; RV32I-LABEL: ldexp_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call ldexpf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: ldexp_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call ldexpf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %z = call float @llvm.experimental.constrained.ldexp.f32.i32(float %x, i32 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %z +} diff --git a/llvm/test/CodeGen/RISCV/float-intrinsics.ll b/llvm/test/CodeGen/RISCV/float-intrinsics.ll index 37381aeeb2a0f..d42afd504e5dc 100644 --- a/llvm/test/CodeGen/RISCV/float-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/float-intrinsics.ll @@ -2242,3 +2242,121 @@ define float @minimumnum_float(float %x, float %y) { %z = call float @llvm.minimumnum.f32(float %x, float %y) ret float %z } + +define float @ldexp_float(float %x, i32 signext %y) nounwind { +; RV32IF-LABEL: ldexp_float: +; RV32IF: # %bb.0: +; RV32IF-NEXT: tail ldexpf +; +; RV32IZFINX-LABEL: ldexp_float: +; RV32IZFINX: # %bb.0: +; RV32IZFINX-NEXT: tail ldexpf +; +; RV64IF-LABEL: ldexp_float: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call ldexpf +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV64IZFINX-LABEL: ldexp_float: +; RV64IZFINX: # %bb.0: +; RV64IZFINX-NEXT: addi sp, sp, -16 +; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFINX-NEXT: call ldexpf +; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFINX-NEXT: addi sp, sp, 16 +; RV64IZFINX-NEXT: ret +; +; RV32I-LABEL: ldexp_float: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call ldexpf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: ldexp_float: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call ldexpf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %z = call float @llvm.ldexp.f32.i32(float %x, i32 %y) + ret float %z +} + +define {float, i32} @frexp_float(float %x) nounwind { +; RV32IF-LABEL: frexp_float: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: addi a0, sp, 8 +; RV32IF-NEXT: call frexpf +; RV32IF-NEXT: lw a0, 8(sp) +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV32IZFINX-LABEL: frexp_float: +; RV32IZFINX: # %bb.0: +; RV32IZFINX-NEXT: addi sp, sp, -16 +; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFINX-NEXT: addi a1, sp, 8 +; RV32IZFINX-NEXT: call frexpf +; RV32IZFINX-NEXT: lw a1, 8(sp) +; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFINX-NEXT: addi sp, sp, 16 +; RV32IZFINX-NEXT: ret +; +; RV64IF-LABEL: frexp_float: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: mv a0, sp +; RV64IF-NEXT: call frexpf +; RV64IF-NEXT: ld a0, 0(sp) +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV64IZFINX-LABEL: frexp_float: +; RV64IZFINX: # %bb.0: +; RV64IZFINX-NEXT: addi sp, sp, -16 +; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFINX-NEXT: mv a1, sp +; RV64IZFINX-NEXT: call frexpf +; RV64IZFINX-NEXT: ld a1, 0(sp) +; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFINX-NEXT: addi sp, sp, 16 +; RV64IZFINX-NEXT: ret +; +; RV32I-LABEL: frexp_float: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: addi a1, sp, 8 +; RV32I-NEXT: call frexpf +; RV32I-NEXT: lw a1, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: frexp_float: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi a1, sp, 4 +; RV64I-NEXT: call frexpf +; RV64I-NEXT: lw a1, 4(sp) +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %a = call {float, i32} @llvm.frexp.f32.i32(float %x) + ret {float, i32} %a +} diff --git a/llvm/test/CodeGen/RISCV/half-intrinsics.ll b/llvm/test/CodeGen/RISCV/half-intrinsics.ll index 0d26e660c979b..8f19424742775 100644 --- a/llvm/test/CodeGen/RISCV/half-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/half-intrinsics.ll @@ -3186,3 +3186,267 @@ define half @minimumnum_half(half %x, half %y) { %z = call half @llvm.minimumnum.f16(half %x, half %y) ret half %z } + +define half @ldexp_half(half %x, i32 signext %y) nounwind { +; RV32IZFH-LABEL: ldexp_half: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFH-NEXT: call ldexpf +; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: ldexp_half: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFH-NEXT: call ldexpf +; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; +; RV32IZHINX-LABEL: ldexp_half: +; RV32IZHINX: # %bb.0: +; RV32IZHINX-NEXT: addi sp, sp, -16 +; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: fcvt.s.h a0, a0 +; RV32IZHINX-NEXT: call ldexpf +; RV32IZHINX-NEXT: fcvt.h.s a0, a0 +; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZHINX-NEXT: addi sp, sp, 16 +; RV32IZHINX-NEXT: ret +; +; RV64IZHINX-LABEL: ldexp_half: +; RV64IZHINX: # %bb.0: +; RV64IZHINX-NEXT: addi sp, sp, -16 +; RV64IZHINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZHINX-NEXT: fcvt.s.h a0, a0 +; RV64IZHINX-NEXT: call ldexpf +; RV64IZHINX-NEXT: fcvt.h.s a0, a0 +; RV64IZHINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZHINX-NEXT: addi sp, sp, 16 +; RV64IZHINX-NEXT: ret +; +; RV32I-LABEL: ldexp_half: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: call __extendhfsf2 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: call ldexpf +; RV32I-NEXT: call __truncsfhf2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: ldexp_half: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: call __extendhfsf2 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call ldexpf +; RV64I-NEXT: call __truncsfhf2 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV32IZFHMIN-LABEL: ldexp_half: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFHMIN-NEXT: call ldexpf +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: ldexp_half: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFHMIN-NEXT: call ldexpf +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFHMIN-NEXT: addi sp, sp, 16 +; RV64IZFHMIN-NEXT: ret +; +; RV32IZHINXMIN-LABEL: ldexp_half: +; RV32IZHINXMIN: # %bb.0: +; RV32IZHINXMIN-NEXT: addi sp, sp, -16 +; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINXMIN-NEXT: fcvt.s.h a0, a0 +; RV32IZHINXMIN-NEXT: call ldexpf +; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0 +; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZHINXMIN-NEXT: addi sp, sp, 16 +; RV32IZHINXMIN-NEXT: ret +; +; RV64IZHINXMIN-LABEL: ldexp_half: +; RV64IZHINXMIN: # %bb.0: +; RV64IZHINXMIN-NEXT: addi sp, sp, -16 +; RV64IZHINXMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZHINXMIN-NEXT: fcvt.s.h a0, a0 +; RV64IZHINXMIN-NEXT: call ldexpf +; RV64IZHINXMIN-NEXT: fcvt.h.s a0, a0 +; RV64IZHINXMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZHINXMIN-NEXT: addi sp, sp, 16 +; RV64IZHINXMIN-NEXT: ret + %z = call half @llvm.ldexp.f16.i32(half %x, i32 %y) + ret half %z +} + +define {half, i32} @frexp_half(half %x) nounwind { +; RV32IZFH-LABEL: frexp_half: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFH-NEXT: addi a0, sp, 8 +; RV32IZFH-NEXT: call frexpf +; RV32IZFH-NEXT: lw a0, 8(sp) +; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: frexp_half: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFH-NEXT: mv a0, sp +; RV64IZFH-NEXT: call frexpf +; RV64IZFH-NEXT: ld a0, 0(sp) +; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; +; RV32IZHINX-LABEL: frexp_half: +; RV32IZHINX: # %bb.0: +; RV32IZHINX-NEXT: addi sp, sp, -16 +; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: fcvt.s.h a0, a0 +; RV32IZHINX-NEXT: addi a1, sp, 8 +; RV32IZHINX-NEXT: call frexpf +; RV32IZHINX-NEXT: lw a1, 8(sp) +; RV32IZHINX-NEXT: fcvt.h.s a0, a0 +; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZHINX-NEXT: addi sp, sp, 16 +; RV32IZHINX-NEXT: ret +; +; RV64IZHINX-LABEL: frexp_half: +; RV64IZHINX: # %bb.0: +; RV64IZHINX-NEXT: addi sp, sp, -16 +; RV64IZHINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZHINX-NEXT: fcvt.s.h a0, a0 +; RV64IZHINX-NEXT: mv a1, sp +; RV64IZHINX-NEXT: call frexpf +; RV64IZHINX-NEXT: ld a1, 0(sp) +; RV64IZHINX-NEXT: fcvt.h.s a0, a0 +; RV64IZHINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZHINX-NEXT: addi sp, sp, 16 +; RV64IZHINX-NEXT: ret +; +; RV32I-LABEL: frexp_half: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: call __extendhfsf2 +; RV32I-NEXT: addi a1, sp, 8 +; RV32I-NEXT: call frexpf +; RV32I-NEXT: call __truncsfhf2 +; RV32I-NEXT: lw a1, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: frexp_half: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: call __extendhfsf2 +; RV64I-NEXT: addi a1, sp, 4 +; RV64I-NEXT: call frexpf +; RV64I-NEXT: call __truncsfhf2 +; RV64I-NEXT: lw a1, 4(sp) +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV32IZFHMIN-LABEL: frexp_half: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFHMIN-NEXT: addi a0, sp, 8 +; RV32IZFHMIN-NEXT: call frexpf +; RV32IZFHMIN-NEXT: lw a0, 8(sp) +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: frexp_half: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFHMIN-NEXT: mv a0, sp +; RV64IZFHMIN-NEXT: call frexpf +; RV64IZFHMIN-NEXT: ld a0, 0(sp) +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFHMIN-NEXT: addi sp, sp, 16 +; RV64IZFHMIN-NEXT: ret +; +; RV32IZHINXMIN-LABEL: frexp_half: +; RV32IZHINXMIN: # %bb.0: +; RV32IZHINXMIN-NEXT: addi sp, sp, -16 +; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINXMIN-NEXT: fcvt.s.h a0, a0 +; RV32IZHINXMIN-NEXT: addi a1, sp, 8 +; RV32IZHINXMIN-NEXT: call frexpf +; RV32IZHINXMIN-NEXT: lw a1, 8(sp) +; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0 +; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZHINXMIN-NEXT: addi sp, sp, 16 +; RV32IZHINXMIN-NEXT: ret +; +; RV64IZHINXMIN-LABEL: frexp_half: +; RV64IZHINXMIN: # %bb.0: +; RV64IZHINXMIN-NEXT: addi sp, sp, -16 +; RV64IZHINXMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZHINXMIN-NEXT: fcvt.s.h a0, a0 +; RV64IZHINXMIN-NEXT: mv a1, sp +; RV64IZHINXMIN-NEXT: call frexpf +; RV64IZHINXMIN-NEXT: ld a1, 0(sp) +; RV64IZHINXMIN-NEXT: fcvt.h.s a0, a0 +; RV64IZHINXMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZHINXMIN-NEXT: addi sp, sp, 16 +; RV64IZHINXMIN-NEXT: ret + %a = call {half, i32} @llvm.frexp.f16.i32(half %x) + ret {half, i32} %a +} diff --git a/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll b/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll index 775ea8e820afe..eb84774014a4b 100644 --- a/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll +++ b/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll @@ -184,13 +184,13 @@ declare i32 @toupper() define signext i32 @overlap_live_ranges(ptr %arg, i32 signext %arg1) { ; CHECK-LABEL: overlap_live_ranges: ; CHECK: # %bb.0: # %bb -; CHECK-NEXT: li a3, 1 -; CHECK-NEXT: li a2, 13 -; CHECK-NEXT: bne a1, a3, .LBB1_2 +; CHECK-NEXT: li a2, 1 +; CHECK-NEXT: bne a1, a2, .LBB1_2 ; CHECK-NEXT: # %bb.1: # %bb2 -; CHECK-NEXT: lw a2, 4(a0) -; CHECK-NEXT: .LBB1_2: # %bb5 -; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: lw a0, 4(a0) +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: li a0, 13 ; CHECK-NEXT: ret bb: %i = icmp eq i32 %arg1, 1 @@ -205,3 +205,181 @@ bb5: ; preds = %bb2, %bb %i6 = phi i32 [ %i4, %bb2 ], [ 13, %bb ] ret i32 %i6 } + + +; For switches, the values feeding the phi are always sunk into the +; target blocks as the IR syntax requires the intermediate block and +; DAG lowers it in the immediate predecessor of the phi. +define signext i32 @switch_dispatch(i8 %a) { +; CHECK-LABEL: switch_dispatch: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: .cfi_offset s0, -16 +; CHECK-NEXT: andi a0, a0, 255 +; CHECK-NEXT: li a1, 31 +; CHECK-NEXT: blt a1, a0, .LBB2_5 +; CHECK-NEXT: # %bb.1: # %bb +; CHECK-NEXT: beqz a0, .LBB2_10 +; CHECK-NEXT: # %bb.2: # %bb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: beq a0, a1, .LBB2_11 +; CHECK-NEXT: # %bb.3: # %bb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: bne a0, a1, .LBB2_9 +; CHECK-NEXT: # %bb.4: # %case.4 +; CHECK-NEXT: li s0, 644 +; CHECK-NEXT: j .LBB2_13 +; CHECK-NEXT: .LBB2_5: # %bb +; CHECK-NEXT: li a1, 234 +; CHECK-NEXT: beq a0, a1, .LBB2_9 +; CHECK-NEXT: # %bb.6: # %bb +; CHECK-NEXT: li a1, 70 +; CHECK-NEXT: beq a0, a1, .LBB2_12 +; CHECK-NEXT: # %bb.7: # %bb +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: bne a0, a1, .LBB2_9 +; CHECK-NEXT: # %bb.8: # %case.0 +; CHECK-NEXT: li s0, 13 +; CHECK-NEXT: j .LBB2_13 +; CHECK-NEXT: .LBB2_9: # %case.default +; CHECK-NEXT: li s0, 23 +; CHECK-NEXT: j .LBB2_13 +; CHECK-NEXT: .LBB2_10: # %case.5 +; CHECK-NEXT: li s0, 54 +; CHECK-NEXT: j .LBB2_13 +; CHECK-NEXT: .LBB2_11: # %case.1 +; CHECK-NEXT: li s0, 53 +; CHECK-NEXT: j .LBB2_13 +; CHECK-NEXT: .LBB2_12: # %case.2 +; CHECK-NEXT: li s0, 33 +; CHECK-NEXT: .LBB2_13: # %merge +; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: call use +; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: .cfi_restore ra +; CHECK-NEXT: .cfi_restore s0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: ret +bb: + switch i8 %a, label %case.default [ + i8 32, label %case.0 + i8 12, label %case.1 + i8 70, label %case.2 + i8 -22, label %case.3 + i8 13, label %case.4 + i8 0, label %case.5 + ] + +case.0: + br label %merge +case.1: + br label %merge +case.2: + br label %merge +case.3: + br label %merge +case.4: + br label %merge +case.5: + br label %merge +case.default: + br label %merge + +merge: + %res = phi i32 [ 23, %case.default ], [ 13, %case.0 ], [ 53, %case.1 ], [ 33, %case.2 ], [ 23, %case.3 ], [ 644, %case.4 ], [ 54, %case.5 ] + call void @use(i32 %res) + ret i32 %res +} + +; Same as for the switch, but written via manual branching. +define signext i32 @branch_dispatch(i8 %a) { +; CHECK-LABEL: branch_dispatch: +; CHECK: # %bb.0: # %case.0 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: .cfi_offset s0, -16 +; CHECK-NEXT: .cfi_remember_state +; CHECK-NEXT: andi a0, a0, 255 +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: beq a0, a1, .LBB3_7 +; CHECK-NEXT: # %bb.1: # %case.1 +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: beq a0, a1, .LBB3_8 +; CHECK-NEXT: # %bb.2: # %case.2 +; CHECK-NEXT: li a1, 70 +; CHECK-NEXT: beq a0, a1, .LBB3_9 +; CHECK-NEXT: # %bb.3: # %case.3 +; CHECK-NEXT: li a1, 234 +; CHECK-NEXT: li s0, 23 +; CHECK-NEXT: beq a0, a1, .LBB3_10 +; CHECK-NEXT: # %bb.4: # %case.4 +; CHECK-NEXT: beqz a0, .LBB3_11 +; CHECK-NEXT: # %bb.5: # %case.5 +; CHECK-NEXT: li a1, 5 +; CHECK-NEXT: bne a0, a1, .LBB3_10 +; CHECK-NEXT: # %bb.6: +; CHECK-NEXT: li s0, 54 +; CHECK-NEXT: j .LBB3_10 +; CHECK-NEXT: .LBB3_7: +; CHECK-NEXT: li s0, 13 +; CHECK-NEXT: j .LBB3_10 +; CHECK-NEXT: .LBB3_8: +; CHECK-NEXT: li s0, 53 +; CHECK-NEXT: j .LBB3_10 +; CHECK-NEXT: .LBB3_9: +; CHECK-NEXT: li s0, 33 +; CHECK-NEXT: .LBB3_10: # %merge +; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: call use +; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: .cfi_restore ra +; CHECK-NEXT: .cfi_restore s0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_11: +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: li s0, 644 +; CHECK-NEXT: j .LBB3_10 +case.0: + %c0 = icmp ne i8 %a, 32 + br i1 %c0, label %case.1, label %merge +case.1: + %c1 = icmp ne i8 %a, 12 + br i1 %c1, label %case.2, label %merge +case.2: + %c2 = icmp ne i8 %a, 70 + br i1 %c2, label %case.3, label %merge +case.3: + %c3 = icmp ne i8 %a, -22 + br i1 %c3, label %case.4, label %merge +case.4: + %c4 = icmp ne i8 %a, 0 + br i1 %c4, label %case.5, label %merge +case.5: + %c5 = icmp ne i8 %a, 5 + br i1 %c5, label %case.default, label %merge +case.default: + br label %merge + +merge: + %res = phi i32 [ 23, %case.default ], [ 13, %case.0 ], [ 53, %case.1 ], [ 33, %case.2 ], [ 23, %case.3 ], [ 644, %case.4 ], [ 54, %case.5 ] + call void @use(i32 %res) + ret i32 %res +} + + +declare void @use(i32) + diff --git a/llvm/test/CodeGen/RISCV/rv64m-w-insts-legalization.ll b/llvm/test/CodeGen/RISCV/rv64m-w-insts-legalization.ll index f69909e76d4c1..a2c572e07ff7d 100644 --- a/llvm/test/CodeGen/RISCV/rv64m-w-insts-legalization.ll +++ b/llvm/test/CodeGen/RISCV/rv64m-w-insts-legalization.ll @@ -5,15 +5,13 @@ define signext i32 @mulw(i32 signext %s, i32 signext %n, i32 signext %k) nounwin ; CHECK-LABEL: mulw: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 1 -; CHECK-NEXT: bge a0, a1, .LBB0_3 -; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: li a2, 1 -; CHECK-NEXT: .LBB0_2: # %for.body +; CHECK-NEXT: bge a0, a1, .LBB0_2 +; CHECK-NEXT: .LBB0_1: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: mulw a2, a0, a2 ; CHECK-NEXT: addiw a0, a0, 1 -; CHECK-NEXT: blt a0, a1, .LBB0_2 -; CHECK-NEXT: .LBB0_3: # %for.cond.cleanup +; CHECK-NEXT: blt a0, a1, .LBB0_1 +; CHECK-NEXT: .LBB0_2: # %for.cond.cleanup ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-cfi-info.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-cfi-info.ll index e78bb323f4d3c..0ae2c2ef9c9d3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-cfi-info.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-cfi-info.ll @@ -3,57 +3,63 @@ ; RUN: | FileCheck -check-prefix=OMIT-FP %s ; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs -frame-pointer=all < %s \ ; RUN: | FileCheck -check-prefix=NO-OMIT-FP %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+m,+zcmp -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=OMIT-FP-ZCMP %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+m,+zcmp -verify-machineinstrs -frame-pointer=all < %s \ +; RUN: | FileCheck -check-prefix=NO-OMIT-FP-ZCMP %s define riscv_vector_cc @test_vector_callee_cfi( %va) { ; OMIT-FP-LABEL: test_vector_callee_cfi: ; OMIT-FP: # %bb.0: # %entry -; OMIT-FP-NEXT: addi sp, sp, -16 -; OMIT-FP-NEXT: .cfi_def_cfa_offset 16 +; OMIT-FP-NEXT: addi sp, sp, -48 +; OMIT-FP-NEXT: .cfi_def_cfa_offset 48 +; OMIT-FP-NEXT: sd s1, 40(sp) # 8-byte Folded Spill +; OMIT-FP-NEXT: .cfi_offset s1, -8 ; OMIT-FP-NEXT: csrr a0, vlenb ; OMIT-FP-NEXT: slli a1, a0, 3 ; OMIT-FP-NEXT: sub a0, a1, a0 ; OMIT-FP-NEXT: sub sp, sp, a0 -; OMIT-FP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x07, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 7 * vlenb +; OMIT-FP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x07, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 7 * vlenb ; OMIT-FP-NEXT: csrr a0, vlenb ; OMIT-FP-NEXT: li a1, 6 ; OMIT-FP-NEXT: mul a0, a0, a1 ; OMIT-FP-NEXT: add a0, sp, a0 -; OMIT-FP-NEXT: addi a0, a0, 16 +; OMIT-FP-NEXT: addi a0, a0, 32 ; OMIT-FP-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill ; OMIT-FP-NEXT: csrr a0, vlenb ; OMIT-FP-NEXT: slli a0, a0, 2 ; OMIT-FP-NEXT: add a0, sp, a0 -; OMIT-FP-NEXT: addi a0, a0, 16 +; OMIT-FP-NEXT: addi a0, a0, 32 ; OMIT-FP-NEXT: vs2r.v v2, (a0) # Unknown-size Folded Spill -; OMIT-FP-NEXT: addi a0, sp, 16 +; OMIT-FP-NEXT: addi a0, sp, 32 ; OMIT-FP-NEXT: vs4r.v v4, (a0) # Unknown-size Folded Spill -; OMIT-FP-NEXT: .cfi_escape 0x10, 0x61, 0x08, 0x11, 0x7f, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v1 @ cfa - 1 * vlenb -; OMIT-FP-NEXT: .cfi_escape 0x10, 0x62, 0x08, 0x11, 0x7d, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v2 @ cfa - 3 * vlenb -; OMIT-FP-NEXT: .cfi_escape 0x10, 0x63, 0x08, 0x11, 0x7e, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v3 @ cfa - 2 * vlenb -; OMIT-FP-NEXT: .cfi_escape 0x10, 0x64, 0x08, 0x11, 0x79, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v4 @ cfa - 7 * vlenb -; OMIT-FP-NEXT: .cfi_escape 0x10, 0x65, 0x08, 0x11, 0x7a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v5 @ cfa - 6 * vlenb -; OMIT-FP-NEXT: .cfi_escape 0x10, 0x66, 0x08, 0x11, 0x7b, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v6 @ cfa - 5 * vlenb -; OMIT-FP-NEXT: .cfi_escape 0x10, 0x67, 0x08, 0x11, 0x7c, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v7 @ cfa - 4 * vlenb +; OMIT-FP-NEXT: .cfi_escape 0x10, 0x61, 0x0b, 0x11, 0x70, 0x22, 0x11, 0x7f, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v1 @ cfa - 16 - 1 * vlenb +; OMIT-FP-NEXT: .cfi_escape 0x10, 0x62, 0x0b, 0x11, 0x70, 0x22, 0x11, 0x7d, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v2 @ cfa - 16 - 3 * vlenb +; OMIT-FP-NEXT: .cfi_escape 0x10, 0x63, 0x0b, 0x11, 0x70, 0x22, 0x11, 0x7e, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v3 @ cfa - 16 - 2 * vlenb +; OMIT-FP-NEXT: .cfi_escape 0x10, 0x64, 0x0b, 0x11, 0x70, 0x22, 0x11, 0x79, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v4 @ cfa - 16 - 7 * vlenb +; OMIT-FP-NEXT: .cfi_escape 0x10, 0x65, 0x0b, 0x11, 0x70, 0x22, 0x11, 0x7a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v5 @ cfa - 16 - 6 * vlenb +; OMIT-FP-NEXT: .cfi_escape 0x10, 0x66, 0x0b, 0x11, 0x70, 0x22, 0x11, 0x7b, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v6 @ cfa - 16 - 5 * vlenb +; OMIT-FP-NEXT: .cfi_escape 0x10, 0x67, 0x0b, 0x11, 0x70, 0x22, 0x11, 0x7c, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v7 @ cfa - 16 - 4 * vlenb ; OMIT-FP-NEXT: #APP ; OMIT-FP-NEXT: #NO_APP ; OMIT-FP-NEXT: csrr a0, vlenb ; OMIT-FP-NEXT: li a1, 6 ; OMIT-FP-NEXT: mul a0, a0, a1 ; OMIT-FP-NEXT: add a0, sp, a0 -; OMIT-FP-NEXT: addi a0, a0, 16 +; OMIT-FP-NEXT: addi a0, a0, 32 ; OMIT-FP-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload ; OMIT-FP-NEXT: csrr a0, vlenb ; OMIT-FP-NEXT: slli a0, a0, 2 ; OMIT-FP-NEXT: add a0, sp, a0 -; OMIT-FP-NEXT: addi a0, a0, 16 +; OMIT-FP-NEXT: addi a0, a0, 32 ; OMIT-FP-NEXT: vl2r.v v2, (a0) # Unknown-size Folded Reload -; OMIT-FP-NEXT: addi a0, sp, 16 +; OMIT-FP-NEXT: addi a0, sp, 32 ; OMIT-FP-NEXT: vl4r.v v4, (a0) # Unknown-size Folded Reload ; OMIT-FP-NEXT: csrr a0, vlenb ; OMIT-FP-NEXT: slli a1, a0, 3 ; OMIT-FP-NEXT: sub a0, a1, a0 ; OMIT-FP-NEXT: add sp, sp, a0 -; OMIT-FP-NEXT: .cfi_def_cfa sp, 16 +; OMIT-FP-NEXT: .cfi_def_cfa sp, 48 ; OMIT-FP-NEXT: .cfi_restore v1 ; OMIT-FP-NEXT: .cfi_restore v2 ; OMIT-FP-NEXT: .cfi_restore v3 @@ -61,19 +67,23 @@ define riscv_vector_cc @test_vector_callee_cfi( @test_vector_callee_cfi( @test_vector_callee_cfi( %va } diff --git a/llvm/test/CodeGen/RISCV/select-const.ll b/llvm/test/CodeGen/RISCV/select-const.ll index 96081fc462d6f..6a24d03de8749 100644 --- a/llvm/test/CodeGen/RISCV/select-const.ll +++ b/llvm/test/CodeGen/RISCV/select-const.ll @@ -61,22 +61,22 @@ define signext i32 @select_const_int_pow2_zero(i1 zeroext %a) nounwind { define signext i32 @select_const_int_harder(i1 zeroext %a) nounwind { ; RV32-LABEL: select_const_int_harder: ; RV32: # %bb.0: -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: li a0, 6 -; RV32-NEXT: bnez a1, .LBB3_2 +; RV32-NEXT: bnez a0, .LBB3_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a0, 38 +; RV32-NEXT: ret ; RV32-NEXT: .LBB3_2: +; RV32-NEXT: li a0, 6 ; RV32-NEXT: ret ; ; RV64-LABEL: select_const_int_harder: ; RV64: # %bb.0: -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: li a0, 6 -; RV64-NEXT: bnez a1, .LBB3_2 +; RV64-NEXT: bnez a0, .LBB3_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a0, 38 +; RV64-NEXT: ret ; RV64-NEXT: .LBB3_2: +; RV64-NEXT: li a0, 6 ; RV64-NEXT: ret %1 = select i1 %a, i32 6, i32 38 ret i32 %1 diff --git a/llvm/test/CodeGen/RISCV/select.ll b/llvm/test/CodeGen/RISCV/select.ll index 252cf776299b3..4405cc3f5e163 100644 --- a/llvm/test/CodeGen/RISCV/select.ll +++ b/llvm/test/CodeGen/RISCV/select.ll @@ -1585,22 +1585,22 @@ define i32 @select_cst_not5(i32 signext %a, i32 signext %b) { define i32 @select_cst_unknown(i32 signext %a, i32 signext %b) { ; RV32IM-LABEL: select_cst_unknown: ; RV32IM: # %bb.0: -; RV32IM-NEXT: mv a2, a0 -; RV32IM-NEXT: li a0, 5 -; RV32IM-NEXT: blt a2, a1, .LBB42_2 +; RV32IM-NEXT: blt a0, a1, .LBB42_2 ; RV32IM-NEXT: # %bb.1: ; RV32IM-NEXT: li a0, -7 +; RV32IM-NEXT: ret ; RV32IM-NEXT: .LBB42_2: +; RV32IM-NEXT: li a0, 5 ; RV32IM-NEXT: ret ; ; RV64IM-LABEL: select_cst_unknown: ; RV64IM: # %bb.0: -; RV64IM-NEXT: mv a2, a0 -; RV64IM-NEXT: li a0, 5 -; RV64IM-NEXT: blt a2, a1, .LBB42_2 +; RV64IM-NEXT: blt a0, a1, .LBB42_2 ; RV64IM-NEXT: # %bb.1: ; RV64IM-NEXT: li a0, -7 +; RV64IM-NEXT: ret ; RV64IM-NEXT: .LBB42_2: +; RV64IM-NEXT: li a0, 5 ; RV64IM-NEXT: ret ; ; RV64IMXVTCONDOPS-LABEL: select_cst_unknown: @@ -1626,22 +1626,22 @@ define i32 @select_cst_unknown(i32 signext %a, i32 signext %b) { define i32 @select_cst1(i1 zeroext %cond) { ; RV32IM-LABEL: select_cst1: ; RV32IM: # %bb.0: -; RV32IM-NEXT: mv a1, a0 -; RV32IM-NEXT: li a0, 10 -; RV32IM-NEXT: bnez a1, .LBB43_2 +; RV32IM-NEXT: bnez a0, .LBB43_2 ; RV32IM-NEXT: # %bb.1: ; RV32IM-NEXT: li a0, 20 +; RV32IM-NEXT: ret ; RV32IM-NEXT: .LBB43_2: +; RV32IM-NEXT: li a0, 10 ; RV32IM-NEXT: ret ; ; RV64IM-LABEL: select_cst1: ; RV64IM: # %bb.0: -; RV64IM-NEXT: mv a1, a0 -; RV64IM-NEXT: li a0, 10 -; RV64IM-NEXT: bnez a1, .LBB43_2 +; RV64IM-NEXT: bnez a0, .LBB43_2 ; RV64IM-NEXT: # %bb.1: ; RV64IM-NEXT: li a0, 20 +; RV64IM-NEXT: ret ; RV64IM-NEXT: .LBB43_2: +; RV64IM-NEXT: li a0, 10 ; RV64IM-NEXT: ret ; ; RV64IMXVTCONDOPS-LABEL: select_cst1: @@ -1664,24 +1664,24 @@ define i32 @select_cst1(i1 zeroext %cond) { define i32 @select_cst2(i1 zeroext %cond) { ; RV32IM-LABEL: select_cst2: ; RV32IM: # %bb.0: -; RV32IM-NEXT: mv a1, a0 -; RV32IM-NEXT: li a0, 10 -; RV32IM-NEXT: bnez a1, .LBB44_2 +; RV32IM-NEXT: bnez a0, .LBB44_2 ; RV32IM-NEXT: # %bb.1: ; RV32IM-NEXT: lui a0, 5 ; RV32IM-NEXT: addi a0, a0, -480 +; RV32IM-NEXT: ret ; RV32IM-NEXT: .LBB44_2: +; RV32IM-NEXT: li a0, 10 ; RV32IM-NEXT: ret ; ; RV64IM-LABEL: select_cst2: ; RV64IM: # %bb.0: -; RV64IM-NEXT: mv a1, a0 -; RV64IM-NEXT: li a0, 10 -; RV64IM-NEXT: bnez a1, .LBB44_2 +; RV64IM-NEXT: bnez a0, .LBB44_2 ; RV64IM-NEXT: # %bb.1: ; RV64IM-NEXT: lui a0, 5 ; RV64IM-NEXT: addiw a0, a0, -480 +; RV64IM-NEXT: ret ; RV64IM-NEXT: .LBB44_2: +; RV64IM-NEXT: li a0, 10 ; RV64IM-NEXT: ret ; ; RV64IMXVTCONDOPS-LABEL: select_cst2: @@ -1782,24 +1782,24 @@ define i32 @select_cst4(i1 zeroext %cond) { define i32 @select_cst5(i1 zeroext %cond) { ; RV32IM-LABEL: select_cst5: ; RV32IM: # %bb.0: -; RV32IM-NEXT: mv a1, a0 -; RV32IM-NEXT: li a0, 2047 -; RV32IM-NEXT: bnez a1, .LBB47_2 +; RV32IM-NEXT: bnez a0, .LBB47_2 ; RV32IM-NEXT: # %bb.1: ; RV32IM-NEXT: lui a0, 1 ; RV32IM-NEXT: addi a0, a0, -2047 +; RV32IM-NEXT: ret ; RV32IM-NEXT: .LBB47_2: +; RV32IM-NEXT: li a0, 2047 ; RV32IM-NEXT: ret ; ; RV64IM-LABEL: select_cst5: ; RV64IM: # %bb.0: -; RV64IM-NEXT: mv a1, a0 -; RV64IM-NEXT: li a0, 2047 -; RV64IM-NEXT: bnez a1, .LBB47_2 +; RV64IM-NEXT: bnez a0, .LBB47_2 ; RV64IM-NEXT: # %bb.1: ; RV64IM-NEXT: lui a0, 1 ; RV64IM-NEXT: addiw a0, a0, -2047 +; RV64IM-NEXT: ret ; RV64IM-NEXT: .LBB47_2: +; RV64IM-NEXT: li a0, 2047 ; RV64IM-NEXT: ret ; ; RV64IMXVTCONDOPS-LABEL: select_cst5: @@ -1862,22 +1862,22 @@ define i32 @select_cst5_invert(i1 zeroext %cond) { define i32 @select_cst_diff2(i1 zeroext %cond) { ; RV32IM-LABEL: select_cst_diff2: ; RV32IM: # %bb.0: -; RV32IM-NEXT: mv a1, a0 -; RV32IM-NEXT: li a0, 120 -; RV32IM-NEXT: bnez a1, .LBB49_2 +; RV32IM-NEXT: bnez a0, .LBB49_2 ; RV32IM-NEXT: # %bb.1: ; RV32IM-NEXT: li a0, 122 +; RV32IM-NEXT: ret ; RV32IM-NEXT: .LBB49_2: +; RV32IM-NEXT: li a0, 120 ; RV32IM-NEXT: ret ; ; RV64IM-LABEL: select_cst_diff2: ; RV64IM: # %bb.0: -; RV64IM-NEXT: mv a1, a0 -; RV64IM-NEXT: li a0, 120 -; RV64IM-NEXT: bnez a1, .LBB49_2 +; RV64IM-NEXT: bnez a0, .LBB49_2 ; RV64IM-NEXT: # %bb.1: ; RV64IM-NEXT: li a0, 122 +; RV64IM-NEXT: ret ; RV64IM-NEXT: .LBB49_2: +; RV64IM-NEXT: li a0, 120 ; RV64IM-NEXT: ret ; ; RV64IMXVTCONDOPS-LABEL: select_cst_diff2: @@ -1900,22 +1900,22 @@ define i32 @select_cst_diff2(i1 zeroext %cond) { define i32 @select_cst_diff2_invert(i1 zeroext %cond) { ; RV32IM-LABEL: select_cst_diff2_invert: ; RV32IM: # %bb.0: -; RV32IM-NEXT: mv a1, a0 -; RV32IM-NEXT: li a0, 122 -; RV32IM-NEXT: bnez a1, .LBB50_2 +; RV32IM-NEXT: bnez a0, .LBB50_2 ; RV32IM-NEXT: # %bb.1: ; RV32IM-NEXT: li a0, 120 +; RV32IM-NEXT: ret ; RV32IM-NEXT: .LBB50_2: +; RV32IM-NEXT: li a0, 122 ; RV32IM-NEXT: ret ; ; RV64IM-LABEL: select_cst_diff2_invert: ; RV64IM: # %bb.0: -; RV64IM-NEXT: mv a1, a0 -; RV64IM-NEXT: li a0, 122 -; RV64IM-NEXT: bnez a1, .LBB50_2 +; RV64IM-NEXT: bnez a0, .LBB50_2 ; RV64IM-NEXT: # %bb.1: ; RV64IM-NEXT: li a0, 120 +; RV64IM-NEXT: ret ; RV64IM-NEXT: .LBB50_2: +; RV64IM-NEXT: li a0, 122 ; RV64IM-NEXT: ret ; ; RV64IMXVTCONDOPS-LABEL: select_cst_diff2_invert: @@ -1938,22 +1938,22 @@ define i32 @select_cst_diff2_invert(i1 zeroext %cond) { define i32 @select_cst_diff4(i1 zeroext %cond) { ; RV32IM-LABEL: select_cst_diff4: ; RV32IM: # %bb.0: -; RV32IM-NEXT: mv a1, a0 -; RV32IM-NEXT: li a0, 10 -; RV32IM-NEXT: bnez a1, .LBB51_2 +; RV32IM-NEXT: bnez a0, .LBB51_2 ; RV32IM-NEXT: # %bb.1: ; RV32IM-NEXT: li a0, 6 +; RV32IM-NEXT: ret ; RV32IM-NEXT: .LBB51_2: +; RV32IM-NEXT: li a0, 10 ; RV32IM-NEXT: ret ; ; RV64IM-LABEL: select_cst_diff4: ; RV64IM: # %bb.0: -; RV64IM-NEXT: mv a1, a0 -; RV64IM-NEXT: li a0, 10 -; RV64IM-NEXT: bnez a1, .LBB51_2 +; RV64IM-NEXT: bnez a0, .LBB51_2 ; RV64IM-NEXT: # %bb.1: ; RV64IM-NEXT: li a0, 6 +; RV64IM-NEXT: ret ; RV64IM-NEXT: .LBB51_2: +; RV64IM-NEXT: li a0, 10 ; RV64IM-NEXT: ret ; ; RV64IMXVTCONDOPS-LABEL: select_cst_diff4: @@ -1976,22 +1976,22 @@ define i32 @select_cst_diff4(i1 zeroext %cond) { define i32 @select_cst_diff4_invert(i1 zeroext %cond) { ; RV32IM-LABEL: select_cst_diff4_invert: ; RV32IM: # %bb.0: -; RV32IM-NEXT: mv a1, a0 -; RV32IM-NEXT: li a0, 6 -; RV32IM-NEXT: bnez a1, .LBB52_2 +; RV32IM-NEXT: bnez a0, .LBB52_2 ; RV32IM-NEXT: # %bb.1: ; RV32IM-NEXT: li a0, 10 +; RV32IM-NEXT: ret ; RV32IM-NEXT: .LBB52_2: +; RV32IM-NEXT: li a0, 6 ; RV32IM-NEXT: ret ; ; RV64IM-LABEL: select_cst_diff4_invert: ; RV64IM: # %bb.0: -; RV64IM-NEXT: mv a1, a0 -; RV64IM-NEXT: li a0, 6 -; RV64IM-NEXT: bnez a1, .LBB52_2 +; RV64IM-NEXT: bnez a0, .LBB52_2 ; RV64IM-NEXT: # %bb.1: ; RV64IM-NEXT: li a0, 10 +; RV64IM-NEXT: ret ; RV64IM-NEXT: .LBB52_2: +; RV64IM-NEXT: li a0, 6 ; RV64IM-NEXT: ret ; ; RV64IMXVTCONDOPS-LABEL: select_cst_diff4_invert: @@ -2014,22 +2014,22 @@ define i32 @select_cst_diff4_invert(i1 zeroext %cond) { define i32 @select_cst_diff8(i1 zeroext %cond) { ; RV32IM-LABEL: select_cst_diff8: ; RV32IM: # %bb.0: -; RV32IM-NEXT: mv a1, a0 -; RV32IM-NEXT: li a0, 14 -; RV32IM-NEXT: bnez a1, .LBB53_2 +; RV32IM-NEXT: bnez a0, .LBB53_2 ; RV32IM-NEXT: # %bb.1: ; RV32IM-NEXT: li a0, 6 +; RV32IM-NEXT: ret ; RV32IM-NEXT: .LBB53_2: +; RV32IM-NEXT: li a0, 14 ; RV32IM-NEXT: ret ; ; RV64IM-LABEL: select_cst_diff8: ; RV64IM: # %bb.0: -; RV64IM-NEXT: mv a1, a0 -; RV64IM-NEXT: li a0, 14 -; RV64IM-NEXT: bnez a1, .LBB53_2 +; RV64IM-NEXT: bnez a0, .LBB53_2 ; RV64IM-NEXT: # %bb.1: ; RV64IM-NEXT: li a0, 6 +; RV64IM-NEXT: ret ; RV64IM-NEXT: .LBB53_2: +; RV64IM-NEXT: li a0, 14 ; RV64IM-NEXT: ret ; ; RV64IMXVTCONDOPS-LABEL: select_cst_diff8: @@ -2052,22 +2052,22 @@ define i32 @select_cst_diff8(i1 zeroext %cond) { define i32 @select_cst_diff8_invert(i1 zeroext %cond) { ; RV32IM-LABEL: select_cst_diff8_invert: ; RV32IM: # %bb.0: -; RV32IM-NEXT: mv a1, a0 -; RV32IM-NEXT: li a0, 6 -; RV32IM-NEXT: bnez a1, .LBB54_2 +; RV32IM-NEXT: bnez a0, .LBB54_2 ; RV32IM-NEXT: # %bb.1: ; RV32IM-NEXT: li a0, 14 +; RV32IM-NEXT: ret ; RV32IM-NEXT: .LBB54_2: +; RV32IM-NEXT: li a0, 6 ; RV32IM-NEXT: ret ; ; RV64IM-LABEL: select_cst_diff8_invert: ; RV64IM: # %bb.0: -; RV64IM-NEXT: mv a1, a0 -; RV64IM-NEXT: li a0, 6 -; RV64IM-NEXT: bnez a1, .LBB54_2 +; RV64IM-NEXT: bnez a0, .LBB54_2 ; RV64IM-NEXT: # %bb.1: ; RV64IM-NEXT: li a0, 14 +; RV64IM-NEXT: ret ; RV64IM-NEXT: .LBB54_2: +; RV64IM-NEXT: li a0, 6 ; RV64IM-NEXT: ret ; ; RV64IMXVTCONDOPS-LABEL: select_cst_diff8_invert: @@ -2091,22 +2091,22 @@ define i32 @select_cst_diff8_invert(i1 zeroext %cond) { define i32 @select_cst_diff1024(i1 zeroext %cond) { ; RV32IM-LABEL: select_cst_diff1024: ; RV32IM: # %bb.0: -; RV32IM-NEXT: mv a1, a0 -; RV32IM-NEXT: li a0, 1030 -; RV32IM-NEXT: bnez a1, .LBB55_2 +; RV32IM-NEXT: bnez a0, .LBB55_2 ; RV32IM-NEXT: # %bb.1: ; RV32IM-NEXT: li a0, 6 +; RV32IM-NEXT: ret ; RV32IM-NEXT: .LBB55_2: +; RV32IM-NEXT: li a0, 1030 ; RV32IM-NEXT: ret ; ; RV64IM-LABEL: select_cst_diff1024: ; RV64IM: # %bb.0: -; RV64IM-NEXT: mv a1, a0 -; RV64IM-NEXT: li a0, 1030 -; RV64IM-NEXT: bnez a1, .LBB55_2 +; RV64IM-NEXT: bnez a0, .LBB55_2 ; RV64IM-NEXT: # %bb.1: ; RV64IM-NEXT: li a0, 6 +; RV64IM-NEXT: ret ; RV64IM-NEXT: .LBB55_2: +; RV64IM-NEXT: li a0, 1030 ; RV64IM-NEXT: ret ; ; RV64IMXVTCONDOPS-LABEL: select_cst_diff1024: @@ -2129,22 +2129,22 @@ define i32 @select_cst_diff1024(i1 zeroext %cond) { define i32 @select_cst_diff1024_invert(i1 zeroext %cond) { ; RV32IM-LABEL: select_cst_diff1024_invert: ; RV32IM: # %bb.0: -; RV32IM-NEXT: mv a1, a0 -; RV32IM-NEXT: li a0, 6 -; RV32IM-NEXT: bnez a1, .LBB56_2 +; RV32IM-NEXT: bnez a0, .LBB56_2 ; RV32IM-NEXT: # %bb.1: ; RV32IM-NEXT: li a0, 1030 +; RV32IM-NEXT: ret ; RV32IM-NEXT: .LBB56_2: +; RV32IM-NEXT: li a0, 6 ; RV32IM-NEXT: ret ; ; RV64IM-LABEL: select_cst_diff1024_invert: ; RV64IM: # %bb.0: -; RV64IM-NEXT: mv a1, a0 -; RV64IM-NEXT: li a0, 6 -; RV64IM-NEXT: bnez a1, .LBB56_2 +; RV64IM-NEXT: bnez a0, .LBB56_2 ; RV64IM-NEXT: # %bb.1: ; RV64IM-NEXT: li a0, 1030 +; RV64IM-NEXT: ret ; RV64IM-NEXT: .LBB56_2: +; RV64IM-NEXT: li a0, 6 ; RV64IM-NEXT: ret ; ; RV64IMXVTCONDOPS-LABEL: select_cst_diff1024_invert: diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll index 11b0e5263e112..e0a16aa05cd00 100644 --- a/llvm/test/CodeGen/RISCV/sextw-removal.ll +++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll @@ -1032,17 +1032,19 @@ bb7: ; preds = %bb2 define signext i32 @bug(i32 signext %x) { ; CHECK-LABEL: bug: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: beqz a0, .LBB18_4 +; CHECK-NEXT: beqz a0, .LBB18_5 ; CHECK-NEXT: # %bb.1: # %if.end -; CHECK-NEXT: srliw a2, a0, 16 -; CHECK-NEXT: seqz a1, a2 -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sllw a0, a0, a1 -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: beqz a2, .LBB18_3 +; CHECK-NEXT: srliw a1, a0, 16 +; CHECK-NEXT: seqz a2, a1 +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: sllw a0, a0, a2 +; CHECK-NEXT: beqz a1, .LBB18_3 ; CHECK-NEXT: # %bb.2: # %if.end ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: .LBB18_3: # %if.end +; CHECK-NEXT: j .LBB18_4 +; CHECK-NEXT: .LBB18_3: +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: .LBB18_4: # %if.end ; CHECK-NEXT: srliw a2, a0, 24 ; CHECK-NEXT: seqz a2, a2 ; CHECK-NEXT: slli a3, a2, 3 @@ -1067,22 +1069,24 @@ define signext i32 @bug(i32 signext %x) { ; CHECK-NEXT: not a0, a0 ; CHECK-NEXT: srli a0, a0, 31 ; CHECK-NEXT: addw a0, a1, a0 -; CHECK-NEXT: .LBB18_4: # %cleanup +; CHECK-NEXT: .LBB18_5: # %cleanup ; CHECK-NEXT: ret ; ; NOREMOVAL-LABEL: bug: ; NOREMOVAL: # %bb.0: # %entry -; NOREMOVAL-NEXT: beqz a0, .LBB18_4 +; NOREMOVAL-NEXT: beqz a0, .LBB18_5 ; NOREMOVAL-NEXT: # %bb.1: # %if.end -; NOREMOVAL-NEXT: srliw a2, a0, 16 -; NOREMOVAL-NEXT: seqz a1, a2 -; NOREMOVAL-NEXT: slli a1, a1, 4 -; NOREMOVAL-NEXT: sllw a0, a0, a1 -; NOREMOVAL-NEXT: li a1, 16 -; NOREMOVAL-NEXT: beqz a2, .LBB18_3 +; NOREMOVAL-NEXT: srliw a1, a0, 16 +; NOREMOVAL-NEXT: seqz a2, a1 +; NOREMOVAL-NEXT: slli a2, a2, 4 +; NOREMOVAL-NEXT: sllw a0, a0, a2 +; NOREMOVAL-NEXT: beqz a1, .LBB18_3 ; NOREMOVAL-NEXT: # %bb.2: # %if.end ; NOREMOVAL-NEXT: li a1, 32 -; NOREMOVAL-NEXT: .LBB18_3: # %if.end +; NOREMOVAL-NEXT: j .LBB18_4 +; NOREMOVAL-NEXT: .LBB18_3: +; NOREMOVAL-NEXT: li a1, 16 +; NOREMOVAL-NEXT: .LBB18_4: # %if.end ; NOREMOVAL-NEXT: srliw a2, a0, 24 ; NOREMOVAL-NEXT: seqz a2, a2 ; NOREMOVAL-NEXT: slli a3, a2, 3 @@ -1107,7 +1111,7 @@ define signext i32 @bug(i32 signext %x) { ; NOREMOVAL-NEXT: not a0, a0 ; NOREMOVAL-NEXT: srli a0, a0, 31 ; NOREMOVAL-NEXT: addw a0, a1, a0 -; NOREMOVAL-NEXT: .LBB18_4: # %cleanup +; NOREMOVAL-NEXT: .LBB18_5: # %cleanup ; NOREMOVAL-NEXT: ret entry: %tobool.not = icmp eq i32 %x, 0 diff --git a/llvm/test/CodeGen/RISCV/typepromotion-overflow.ll b/llvm/test/CodeGen/RISCV/typepromotion-overflow.ll index ec7e0ecce80ca..ae1aabed49805 100644 --- a/llvm/test/CodeGen/RISCV/typepromotion-overflow.ll +++ b/llvm/test/CodeGen/RISCV/typepromotion-overflow.ll @@ -7,13 +7,14 @@ define zeroext i16 @overflow_add(i16 zeroext %a, i16 zeroext %b) { ; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: ori a0, a0, 1 ; CHECK-NEXT: slli a0, a0, 48 -; CHECK-NEXT: srli a1, a0, 48 -; CHECK-NEXT: li a2, 1024 -; CHECK-NEXT: li a0, 2 -; CHECK-NEXT: bltu a2, a1, .LBB0_2 +; CHECK-NEXT: srli a0, a0, 48 +; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: bltu a1, a0, .LBB0_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 5 +; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: li a0, 2 ; CHECK-NEXT: ret %add = add i16 %b, %a %or = or i16 %add, 1 @@ -28,13 +29,14 @@ define zeroext i16 @overflow_sub(i16 zeroext %a, i16 zeroext %b) { ; CHECK-NEXT: subw a0, a0, a1 ; CHECK-NEXT: ori a0, a0, 1 ; CHECK-NEXT: slli a0, a0, 48 -; CHECK-NEXT: srli a1, a0, 48 -; CHECK-NEXT: li a2, 1024 -; CHECK-NEXT: li a0, 2 -; CHECK-NEXT: bltu a2, a1, .LBB1_2 +; CHECK-NEXT: srli a0, a0, 48 +; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: bltu a1, a0, .LBB1_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 5 +; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: li a0, 2 ; CHECK-NEXT: ret %add = sub i16 %a, %b %or = or i16 %add, 1 @@ -49,13 +51,14 @@ define zeroext i16 @overflow_mul(i16 zeroext %a, i16 zeroext %b) { ; CHECK-NEXT: mul a0, a1, a0 ; CHECK-NEXT: ori a0, a0, 1 ; CHECK-NEXT: slli a0, a0, 48 -; CHECK-NEXT: srli a1, a0, 48 -; CHECK-NEXT: li a2, 1024 -; CHECK-NEXT: li a0, 2 -; CHECK-NEXT: bltu a2, a1, .LBB2_2 +; CHECK-NEXT: srli a0, a0, 48 +; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: bltu a1, a0, .LBB2_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 5 +; CHECK-NEXT: ret ; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: li a0, 2 ; CHECK-NEXT: ret %add = mul i16 %b, %a %or = or i16 %add, 1 @@ -70,13 +73,14 @@ define zeroext i16 @overflow_shl(i16 zeroext %a, i16 zeroext %b) { ; CHECK-NEXT: sll a0, a0, a1 ; CHECK-NEXT: ori a0, a0, 1 ; CHECK-NEXT: slli a0, a0, 48 -; CHECK-NEXT: srli a1, a0, 48 -; CHECK-NEXT: li a2, 1024 -; CHECK-NEXT: li a0, 2 -; CHECK-NEXT: bltu a2, a1, .LBB3_2 +; CHECK-NEXT: srli a0, a0, 48 +; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: bltu a1, a0, .LBB3_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 5 +; CHECK-NEXT: ret ; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: li a0, 2 ; CHECK-NEXT: ret %add = shl i16 %a, %b %or = or i16 %add, 1 @@ -89,12 +93,13 @@ define i32 @overflow_add_no_consts(i8 zeroext %a, i8 zeroext %b, i8 zeroext %lim ; CHECK-LABEL: overflow_add_no_consts: ; CHECK: # %bb.0: ; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: andi a1, a0, 255 -; CHECK-NEXT: li a0, 8 -; CHECK-NEXT: bltu a2, a1, .LBB4_2 +; CHECK-NEXT: andi a0, a0, 255 +; CHECK-NEXT: bltu a2, a0, .LBB4_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: ret ; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: li a0, 8 ; CHECK-NEXT: ret %add = add i8 %b, %a %cmp = icmp ugt i8 %add, %limit @@ -106,13 +111,14 @@ define i32 @overflow_add_const_limit(i8 zeroext %a, i8 zeroext %b) { ; CHECK-LABEL: overflow_add_const_limit: ; CHECK: # %bb.0: ; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: andi a1, a0, 255 -; CHECK-NEXT: li a2, 128 -; CHECK-NEXT: li a0, 8 -; CHECK-NEXT: bltu a2, a1, .LBB5_2 +; CHECK-NEXT: andi a0, a0, 255 +; CHECK-NEXT: li a1, 128 +; CHECK-NEXT: bltu a1, a0, .LBB5_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: ret ; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: li a0, 8 ; CHECK-NEXT: ret %add = add i8 %b, %a %cmp = icmp ugt i8 %add, -128 @@ -124,13 +130,14 @@ define i32 @overflow_add_positive_const_limit(i8 zeroext %a) { ; CHECK-LABEL: overflow_add_positive_const_limit: ; CHECK: # %bb.0: ; CHECK-NEXT: slli a0, a0, 56 -; CHECK-NEXT: srai a1, a0, 56 -; CHECK-NEXT: li a2, -1 -; CHECK-NEXT: li a0, 8 -; CHECK-NEXT: blt a1, a2, .LBB6_2 +; CHECK-NEXT: srai a0, a0, 56 +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: blt a0, a1, .LBB6_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: ret ; CHECK-NEXT: .LBB6_2: +; CHECK-NEXT: li a0, 8 ; CHECK-NEXT: ret %cmp = icmp slt i8 %a, -1 %res = select i1 %cmp, i32 8, i32 16 @@ -140,13 +147,13 @@ define i32 @overflow_add_positive_const_limit(i8 zeroext %a) { define i32 @unsafe_add_underflow(i8 zeroext %a) { ; CHECK-LABEL: unsafe_add_underflow: ; CHECK: # %bb.0: -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: li a2, 1 -; CHECK-NEXT: li a0, 8 -; CHECK-NEXT: beq a1, a2, .LBB7_2 +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: beq a0, a1, .LBB7_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: ret ; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: li a0, 8 ; CHECK-NEXT: ret %cmp = icmp eq i8 %a, 1 %res = select i1 %cmp, i32 8, i32 16 @@ -156,12 +163,12 @@ define i32 @unsafe_add_underflow(i8 zeroext %a) { define i32 @safe_add_underflow(i8 zeroext %a) { ; CHECK-LABEL: safe_add_underflow: ; CHECK: # %bb.0: -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: li a0, 8 -; CHECK-NEXT: beqz a1, .LBB8_2 +; CHECK-NEXT: beqz a0, .LBB8_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: ret ; CHECK-NEXT: .LBB8_2: +; CHECK-NEXT: li a0, 8 ; CHECK-NEXT: ret %cmp = icmp eq i8 %a, 0 %res = select i1 %cmp, i32 8, i32 16 @@ -171,13 +178,14 @@ define i32 @safe_add_underflow(i8 zeroext %a) { define i32 @safe_add_underflow_neg(i8 zeroext %a) { ; CHECK-LABEL: safe_add_underflow_neg: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, -2 -; CHECK-NEXT: li a2, 251 -; CHECK-NEXT: li a0, 8 -; CHECK-NEXT: bltu a1, a2, .LBB9_2 +; CHECK-NEXT: addi a0, a0, -2 +; CHECK-NEXT: li a1, 251 +; CHECK-NEXT: bltu a0, a1, .LBB9_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: ret ; CHECK-NEXT: .LBB9_2: +; CHECK-NEXT: li a0, 8 ; CHECK-NEXT: ret %add = add i8 %a, -2 %cmp = icmp ult i8 %add, -5 @@ -189,13 +197,14 @@ define i32 @overflow_sub_negative_const_limit(i8 zeroext %a) { ; CHECK-LABEL: overflow_sub_negative_const_limit: ; CHECK: # %bb.0: ; CHECK-NEXT: slli a0, a0, 56 -; CHECK-NEXT: srai a1, a0, 56 -; CHECK-NEXT: li a2, -1 -; CHECK-NEXT: li a0, 8 -; CHECK-NEXT: blt a1, a2, .LBB10_2 +; CHECK-NEXT: srai a0, a0, 56 +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: blt a0, a1, .LBB10_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: ret ; CHECK-NEXT: .LBB10_2: +; CHECK-NEXT: li a0, 8 ; CHECK-NEXT: ret %cmp = icmp slt i8 %a, -1 %res = select i1 %cmp, i32 8, i32 16 @@ -206,13 +215,14 @@ define i32 @overflow_sub_negative_const_limit(i8 zeroext %a) { define i32 @sext_sub_underflow(i8 zeroext %a) { ; CHECK-LABEL: sext_sub_underflow: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, -6 -; CHECK-NEXT: li a2, -6 -; CHECK-NEXT: li a0, 8 -; CHECK-NEXT: bltu a2, a1, .LBB11_2 +; CHECK-NEXT: addi a0, a0, -6 +; CHECK-NEXT: li a1, -6 +; CHECK-NEXT: bltu a1, a0, .LBB11_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: ret ; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: li a0, 8 ; CHECK-NEXT: ret %sub = add i8 %a, -6 %cmp = icmp ugt i8 %sub, -6 @@ -223,12 +233,12 @@ define i32 @sext_sub_underflow(i8 zeroext %a) { define i32 @safe_sub_underflow(i8 zeroext %a) { ; CHECK-LABEL: safe_sub_underflow: ; CHECK: # %bb.0: -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: beqz a1, .LBB12_2 +; CHECK-NEXT: beqz a0, .LBB12_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: ret ; CHECK-NEXT: .LBB12_2: +; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: ret %cmp.not = icmp eq i8 %a, 0 %res = select i1 %cmp.not, i32 16, i32 8 @@ -238,13 +248,14 @@ define i32 @safe_sub_underflow(i8 zeroext %a) { define i32 @safe_sub_underflow_neg(i8 zeroext %a) { ; CHECK-LABEL: safe_sub_underflow_neg: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, -4 -; CHECK-NEXT: li a2, 250 -; CHECK-NEXT: li a0, 8 -; CHECK-NEXT: bltu a2, a1, .LBB13_2 +; CHECK-NEXT: addi a0, a0, -4 +; CHECK-NEXT: li a1, 250 +; CHECK-NEXT: bltu a1, a0, .LBB13_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: ret ; CHECK-NEXT: .LBB13_2: +; CHECK-NEXT: li a0, 8 ; CHECK-NEXT: ret %sub = add i8 %a, -4 %cmp = icmp ugt i8 %sub, -6 @@ -256,13 +267,14 @@ define i32 @safe_sub_underflow_neg(i8 zeroext %a) { define i32 @sext_sub_underflow_neg(i8 zeroext %a) { ; CHECK-LABEL: sext_sub_underflow_neg: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, -4 -; CHECK-NEXT: li a2, -3 -; CHECK-NEXT: li a0, 8 -; CHECK-NEXT: bltu a1, a2, .LBB14_2 +; CHECK-NEXT: addi a0, a0, -4 +; CHECK-NEXT: li a1, -3 +; CHECK-NEXT: bltu a0, a1, .LBB14_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: ret ; CHECK-NEXT: .LBB14_2: +; CHECK-NEXT: li a0, 8 ; CHECK-NEXT: ret %sub = add i8 %a, -4 %cmp = icmp ult i8 %sub, -3 diff --git a/llvm/test/CodeGen/RISCV/zfh-half-intrinsics-strict.ll b/llvm/test/CodeGen/RISCV/zfh-half-intrinsics-strict.ll index 348ca8e529621..3efa9e58e65d3 100644 --- a/llvm/test/CodeGen/RISCV/zfh-half-intrinsics-strict.ll +++ b/llvm/test/CodeGen/RISCV/zfh-half-intrinsics-strict.ll @@ -737,3 +737,73 @@ define i64 @llround_f16(half %a) nounwind strictfp { %1 = call i64 @llvm.experimental.constrained.llround.i64.f16(half %a, metadata !"fpexcept.strict") strictfp ret i64 %1 } + +define half @ldexp_f16(half %x, i32 signext %y) nounwind { +; RV32IZFH-LABEL: ldexp_f16: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFH-NEXT: call ldexpf +; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: ldexp_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFH-NEXT: call ldexpf +; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; +; RV32IZHINX-LABEL: ldexp_f16: +; RV32IZHINX: # %bb.0: +; RV32IZHINX-NEXT: addi sp, sp, -16 +; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: fcvt.s.h a0, a0 +; RV32IZHINX-NEXT: call ldexpf +; RV32IZHINX-NEXT: fcvt.h.s a0, a0 +; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZHINX-NEXT: addi sp, sp, 16 +; RV32IZHINX-NEXT: ret +; +; RV64IZHINX-LABEL: ldexp_f16: +; RV64IZHINX: # %bb.0: +; RV64IZHINX-NEXT: addi sp, sp, -16 +; RV64IZHINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZHINX-NEXT: fcvt.s.h a0, a0 +; RV64IZHINX-NEXT: call ldexpf +; RV64IZHINX-NEXT: fcvt.h.s a0, a0 +; RV64IZHINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZHINX-NEXT: addi sp, sp, 16 +; RV64IZHINX-NEXT: ret +; +; RV32IZDINXZHINX-LABEL: ldexp_f16: +; RV32IZDINXZHINX: # %bb.0: +; RV32IZDINXZHINX-NEXT: addi sp, sp, -16 +; RV32IZDINXZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZDINXZHINX-NEXT: fcvt.s.h a0, a0 +; RV32IZDINXZHINX-NEXT: call ldexpf +; RV32IZDINXZHINX-NEXT: fcvt.h.s a0, a0 +; RV32IZDINXZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZDINXZHINX-NEXT: addi sp, sp, 16 +; RV32IZDINXZHINX-NEXT: ret +; +; RV64IZDINXZHINX-LABEL: ldexp_f16: +; RV64IZDINXZHINX: # %bb.0: +; RV64IZDINXZHINX-NEXT: addi sp, sp, -16 +; RV64IZDINXZHINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZDINXZHINX-NEXT: fcvt.s.h a0, a0 +; RV64IZDINXZHINX-NEXT: call ldexpf +; RV64IZDINXZHINX-NEXT: fcvt.h.s a0, a0 +; RV64IZDINXZHINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZDINXZHINX-NEXT: addi sp, sp, 16 +; RV64IZDINXZHINX-NEXT: ret + %z = call half @llvm.experimental.constrained.ldexp.f16.i32(half %x, i32 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %z +} diff --git a/llvm/test/CodeGen/RISCV/zfhmin-half-intrinsics-strict.ll b/llvm/test/CodeGen/RISCV/zfhmin-half-intrinsics-strict.ll index 097d1e0f6ee55..214ea46d3130d 100644 --- a/llvm/test/CodeGen/RISCV/zfhmin-half-intrinsics-strict.ll +++ b/llvm/test/CodeGen/RISCV/zfhmin-half-intrinsics-strict.ll @@ -767,3 +767,73 @@ define i64 @llround_f16(half %a) nounwind strictfp { %1 = call i64 @llvm.experimental.constrained.llround.i64.f16(half %a, metadata !"fpexcept.strict") strictfp ret i64 %1 } + +define half @ldexp_f16(half %x, i32 signext %y) nounwind { +; RV32IZFHMIN-LABEL: ldexp_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFHMIN-NEXT: call ldexpf +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: ldexp_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFHMIN-NEXT: call ldexpf +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFHMIN-NEXT: addi sp, sp, 16 +; RV64IZFHMIN-NEXT: ret +; +; RV32IZHINXMIN-STRICT-LABEL: ldexp_f16: +; RV32IZHINXMIN-STRICT: # %bb.0: +; RV32IZHINXMIN-STRICT-NEXT: addi sp, sp, -16 +; RV32IZHINXMIN-STRICT-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINXMIN-STRICT-NEXT: fcvt.s.h a0, a0 +; RV32IZHINXMIN-STRICT-NEXT: call ldexpf +; RV32IZHINXMIN-STRICT-NEXT: fcvt.h.s a0, a0 +; RV32IZHINXMIN-STRICT-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZHINXMIN-STRICT-NEXT: addi sp, sp, 16 +; RV32IZHINXMIN-STRICT-NEXT: ret +; +; RV64IZHINXMIN-STRICT-LABEL: ldexp_f16: +; RV64IZHINXMIN-STRICT: # %bb.0: +; RV64IZHINXMIN-STRICT-NEXT: addi sp, sp, -16 +; RV64IZHINXMIN-STRICT-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZHINXMIN-STRICT-NEXT: fcvt.s.h a0, a0 +; RV64IZHINXMIN-STRICT-NEXT: call ldexpf +; RV64IZHINXMIN-STRICT-NEXT: fcvt.h.s a0, a0 +; RV64IZHINXMIN-STRICT-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZHINXMIN-STRICT-NEXT: addi sp, sp, 16 +; RV64IZHINXMIN-STRICT-NEXT: ret +; +; RV32IZDINXZHINXMIN-LABEL: ldexp_f16: +; RV32IZDINXZHINXMIN: # %bb.0: +; RV32IZDINXZHINXMIN-NEXT: addi sp, sp, -16 +; RV32IZDINXZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZDINXZHINXMIN-NEXT: fcvt.s.h a0, a0 +; RV32IZDINXZHINXMIN-NEXT: call ldexpf +; RV32IZDINXZHINXMIN-NEXT: fcvt.h.s a0, a0 +; RV32IZDINXZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZDINXZHINXMIN-NEXT: addi sp, sp, 16 +; RV32IZDINXZHINXMIN-NEXT: ret +; +; RV64IZDINXZHINXMIN-LABEL: ldexp_f16: +; RV64IZDINXZHINXMIN: # %bb.0: +; RV64IZDINXZHINXMIN-NEXT: addi sp, sp, -16 +; RV64IZDINXZHINXMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZDINXZHINXMIN-NEXT: fcvt.s.h a0, a0 +; RV64IZDINXZHINXMIN-NEXT: call ldexpf +; RV64IZDINXZHINXMIN-NEXT: fcvt.h.s a0, a0 +; RV64IZDINXZHINXMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZDINXZHINXMIN-NEXT: addi sp, sp, 16 +; RV64IZDINXZHINXMIN-NEXT: ret + %z = call half @llvm.experimental.constrained.ldexp.f16.i32(half %x, i32 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %z +} diff --git a/llvm/test/CodeGen/X86/apx/imulzu.ll b/llvm/test/CodeGen/X86/apx/imulzu.ll new file mode 100644 index 0000000000000..9a4a63750a1db --- /dev/null +++ b/llvm/test/CodeGen/X86/apx/imulzu.ll @@ -0,0 +1,226 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -mattr=+zu | FileCheck %s --check-prefixes=CHECK,ZU +; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOZU + +; Test generation of 16b imulzu when -mattr=+zu is specified. +; The mulzu_* tests check for basic generation, which is limited to cases where a +; zero-extend of the result can be folded into imulzu. +; The remaining tests are modifications of selected test/CodeGen/X86/imul.ll tests with +; 16b multiplies, to check that common strength reductions in ISel are still performed +; when -mattr=+zu is in effect. +; +; FIXME: several cases from imul.ll covering DAG combines, in particular those using LEA, +; are not ported as X86's IsDesirableToPromoteOp has no way to accurately identify when +; promotion will permit a better sequence than an unpromoted imulzu. +; These cases should be added when they are implemented. + +define i32 @mulzu_16_32(i16 %A) { +; ZU-LABEL: mulzu_16_32: +; ZU: # %bb.0: +; ZU-NEXT: imulzuw $1234, %di, %ax # imm = 0x4D2 +; ZU-NEXT: retq +; +; NOZU-LABEL: mulzu_16_32: +; NOZU: # %bb.0: +; NOZU-NEXT: imull $1234, %edi, %eax # imm = 0x4D2 +; NOZU-NEXT: movzwl %ax, %eax +; NOZU-NEXT: retq + %mul = mul i16 %A, 1234 + %r = zext i16 %mul to i32 + ret i32 %r +} + +define i64 @mulzu_16_64(i16 %A) { +; ZU-LABEL: mulzu_16_64: +; ZU: # %bb.0: +; ZU-NEXT: imulzuw $1234, %di, %ax # imm = 0x4D2 +; ZU-NEXT: retq +; +; NOZU-LABEL: mulzu_16_64: +; NOZU: # %bb.0: +; NOZU-NEXT: imull $1234, %edi, %eax # imm = 0x4D2 +; NOZU-NEXT: movzwl %ax, %eax +; NOZU-NEXT: retq + %mul = mul i16 %A, 1234 + %r = zext i16 %mul to i64 + ret i64 %r +} + +define i32 @mulzu_16_32_mem(ptr %P) { +; ZU-LABEL: mulzu_16_32_mem: +; ZU: # %bb.0: +; ZU-NEXT: imulzuw $1234, (%rdi), %ax # imm = 0x4D2 +; ZU-NEXT: retq +; +; NOZU-LABEL: mulzu_16_32_mem: +; NOZU: # %bb.0: +; NOZU-NEXT: movzwl (%rdi), %eax +; NOZU-NEXT: imull $1234, %eax, %eax # imm = 0x4D2 +; NOZU-NEXT: movzwl %ax, %eax +; NOZU-NEXT: retq + %gep = getelementptr i16, ptr %P, i64 0 + %A = load i16, ptr %gep + %mul = mul i16 %A, 1234 + %r = zext i16 %mul to i32 + ret i32 %r +} + +define i64 @mulzu_16_64_mem(ptr %P) { +; ZU-LABEL: mulzu_16_64_mem: +; ZU: # %bb.0: +; ZU-NEXT: imulzuw $1234, (%rdi), %ax # imm = 0x4D2 +; ZU-NEXT: retq +; +; NOZU-LABEL: mulzu_16_64_mem: +; NOZU: # %bb.0: +; NOZU-NEXT: movzwl (%rdi), %eax +; NOZU-NEXT: imull $1234, %eax, %eax # imm = 0x4D2 +; NOZU-NEXT: movzwl %ax, %eax +; NOZU-NEXT: retq + %gep = getelementptr i16, ptr %P, i64 0 + %A = load i16, ptr %gep + %mul = mul i16 %A, 1234 + %r = zext i16 %mul to i64 + ret i64 %r +} + +; The following mulzu cases check that imulzu is not +; generated in the absence of a single zext user. The ZU/NOZU +; cases should match. + +define void @mulzu_16_store(i16 %A, ptr %R) { +; CHECK-LABEL: mulzu_16_store: +; CHECK: # %bb.0: +; CHECK-NEXT: imull $1234, %edi, %eax # imm = 0x4D2 +; CHECK-NEXT: movw %ax, (%rsi) +; CHECK-NEXT: retq + %gep = getelementptr i16, ptr %R, i64 0 + %mul = mul i16 %A, 1234 + store i16 %mul, ptr %gep + ret void +} + +define i32 @mulzu_16_store_32(i16 %A, ptr %R) { +; CHECK-LABEL: mulzu_16_store_32: +; CHECK: # %bb.0: +; CHECK-NEXT: imull $1234, %edi, %eax # imm = 0x4D2 +; CHECK-NEXT: movw %ax, (%rsi) +; CHECK-NEXT: movzwl %ax, %eax +; CHECK-NEXT: retq + %gep = getelementptr i16, ptr %R, i64 0 + %mul = mul i16 %A, 1234 + store i16 %mul, ptr %gep + %r = zext i16 %mul to i32 + ret i32 %r +} + +define i64 @mulzu_16_store_64(i16 %A, ptr %R) { +; CHECK-LABEL: mulzu_16_store_64: +; CHECK: # %bb.0: +; CHECK-NEXT: imull $1234, %edi, %eax # imm = 0x4D2 +; CHECK-NEXT: movw %ax, (%rsi) +; CHECK-NEXT: movzwl %ax, %eax +; CHECK-NEXT: retq + %gep = getelementptr i16, ptr %R, i64 0 + %mul = mul i16 %A, 1234 + store i16 %mul, ptr %gep + %r = zext i16 %mul to i64 + ret i64 %r +} + +define i32 @mulzu_sext_16_32(i16 %A) { +; CHECK-LABEL: mulzu_sext_16_32: +; CHECK: # %bb.0: +; CHECK-NEXT: imull $1234, %edi, %eax # imm = 0x4D2 +; CHECK-NEXT: cwtl +; CHECK-NEXT: retq + %mul = mul i16 %A, 1234 + %r = sext i16 %mul to i32 + ret i32 %r +} + +define i64 @mulzu_sext_16_64(i16 %A) { +; CHECK-LABEL: mulzu_sext_16_64: +; CHECK: # %bb.0: +; CHECK-NEXT: imull $1234, %edi, %eax # imm = 0x4D2 +; CHECK-NEXT: movswq %ax, %rax +; CHECK-NEXT: retq + %mul = mul i16 %A, 1234 + %r = sext i16 %mul to i64 + ret i64 %r +} + +; Tests ported from test/CodeGen/X86/imul.ll follow from this point. +; The generated code, which strength-reduces multiplies by certain +; constants, should be unaffected by enabling zu. + +define i16 @mul4_16(i16 %A) { +; +; CHECK-LABEL: mul4_16: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal (,%rdi,4), %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: retq + %mul = mul i16 %A, 4 + ret i16 %mul +} + +define i16 @mul4096_16(i16 %A) { +; +; CHECK-LABEL: mul4096_16: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shll $12, %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: retq + %mul = mul i16 %A, 4096 + ret i16 %mul +} + +define i16 @mulmin4096_16(i16 %A) { +; +; CHECK-LABEL: mulmin4096_16: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shll $12, %eax +; CHECK-NEXT: negl %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: retq + %mul = mul i16 %A, -4096 + ret i16 %mul +} + +define i16 @mul4_16_minsize(i16 %A) minsize { +; +; CHECK-LABEL: mul4_16_minsize: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal (,%rdi,4), %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: retq + %mul = mul i16 %A, 4 + ret i16 %mul +} + +define i16 @mul0_16(i16 %A) { +; +; CHECK-LABEL: mul0_16: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq + %mul = mul i16 %A, 0 + ret i16 %mul +} + +define i16 @mul4294967295_16(i16 %A) { +; +; CHECK-LABEL: mul4294967295_16: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: negl %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: retq + %mul = mul i16 %A, 4294967295 + ret i16 %mul +} diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll index f2a197cca8ae5..1c4bfa8422d81 100644 --- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll +++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll @@ -2159,30 +2159,11 @@ define i128 @test_insertelement_variable_v128i1(<128 x i8> %a, i8 %b, i32 %index define void @test_concat_v2i1(ptr %arg, ptr %arg1, ptr %arg2) nounwind { ; KNL-LABEL: test_concat_v2i1: ; KNL: ## %bb.0: -; KNL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; KNL-NEXT: vcvtph2ps %xmm0, %xmm1 -; KNL-NEXT: vmovss {{.*#+}} xmm2 = [6.0E+0,0.0E+0,0.0E+0,0.0E+0] -; KNL-NEXT: vucomiss %xmm2, %xmm1 -; KNL-NEXT: setb %al -; KNL-NEXT: andl $1, %eax -; KNL-NEXT: kmovw %eax, %k0 -; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] -; KNL-NEXT: vcvtph2ps %xmm0, %xmm0 -; KNL-NEXT: vucomiss %xmm2, %xmm0 -; KNL-NEXT: setb %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $1, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 -; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; KNL-NEXT: vucomiss %xmm2, %xmm1 -; KNL-NEXT: seta %al -; KNL-NEXT: andl $1, %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: vucomiss %xmm2, %xmm0 -; KNL-NEXT: seta %al -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kshiftlw $1, %k2, %k2 -; KNL-NEXT: korw %k2, %k1, %k1 +; KNL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; KNL-NEXT: vcvtph2ps %xmm0, %ymm0 +; KNL-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0 +; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; KNL-NEXT: vcmpltps %zmm0, %zmm1, %k1 ; KNL-NEXT: kandw %k1, %k0, %k1 ; KNL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} @@ -2194,36 +2175,16 @@ define void @test_concat_v2i1(ptr %arg, ptr %arg1, ptr %arg2) nounwind { ; ; SKX-LABEL: test_concat_v2i1: ; SKX: ## %bb.0: -; SKX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; SKX-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,1,1,4,5,6,7] -; SKX-NEXT: vcvtph2ps %xmm1, %xmm1 -; SKX-NEXT: vmovss {{.*#+}} xmm2 = [6.0E+0,0.0E+0,0.0E+0,0.0E+0] -; SKX-NEXT: vucomiss %xmm2, %xmm1 -; SKX-NEXT: setb %al -; SKX-NEXT: kmovd %eax, %k0 -; SKX-NEXT: kshiftlb $1, %k0, %k0 -; SKX-NEXT: vcvtph2ps %xmm0, %xmm0 -; SKX-NEXT: vucomiss %xmm2, %xmm0 -; SKX-NEXT: setb %al -; SKX-NEXT: kmovd %eax, %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $7, %k1, %k1 -; SKX-NEXT: korw %k0, %k1, %k0 -; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; SKX-NEXT: vucomiss %xmm2, %xmm1 -; SKX-NEXT: seta %al -; SKX-NEXT: kmovd %eax, %k1 -; SKX-NEXT: kshiftlb $1, %k1, %k1 -; SKX-NEXT: vucomiss %xmm2, %xmm0 -; SKX-NEXT: seta %al -; SKX-NEXT: kmovd %eax, %k2 -; SKX-NEXT: kshiftlb $7, %k2, %k2 -; SKX-NEXT: kshiftrb $7, %k2, %k2 -; SKX-NEXT: korw %k1, %k2, %k1 +; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; SKX-NEXT: vcvtph2ps %xmm0, %ymm0 +; SKX-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0 +; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vcmpltps %ymm0, %ymm1, %k1 ; SKX-NEXT: kandw %k1, %k0, %k1 ; SKX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero ; SKX-NEXT: vmovdqu16 %xmm0, %xmm0 {%k1} {z} ; SKX-NEXT: vmovd %xmm0, (%rdx) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %tmp = load <2 x half>, ptr %arg, align 8 %tmp3 = fcmp fast olt <2 x half> %tmp, diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll index ffbeeb19a4aeb..5078130f18077 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll @@ -2540,9 +2540,9 @@ define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mem_mask3(ptr %vp, <4 x i64> define <4 x i64> @test_masked_8xi64_to_4xi64_perm_mem_mask4(ptr %vp, <4 x i64> %vec2, <4 x i64> %mask) { ; CHECK-LABEL: test_masked_8xi64_to_4xi64_perm_mem_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovdqa (%rdi), %ymm2 -; CHECK-NEXT: vpmovsxbq {{.*#+}} ymm3 = [0,4,6,1] -; CHECK-NEXT: vpermi2q 32(%rdi), %ymm2, %ymm3 +; CHECK-NEXT: vmovdqa 32(%rdi), %ymm2 +; CHECK-NEXT: vpmovsxbq {{.*#+}} ymm3 = [4,0,2,5] +; CHECK-NEXT: vpermi2q (%rdi), %ymm2, %ymm3 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 ; CHECK-NEXT: vmovdqa64 %ymm3, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -2556,10 +2556,10 @@ define <4 x i64> @test_masked_8xi64_to_4xi64_perm_mem_mask4(ptr %vp, <4 x i64> % define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mem_mask4(ptr %vp, <4 x i64> %mask) { ; CHECK-LABEL: test_masked_z_8xi64_to_4xi64_perm_mem_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovdqa (%rdi), %ymm2 -; CHECK-NEXT: vpmovsxbq {{.*#+}} ymm1 = [0,4,6,1] +; CHECK-NEXT: vmovdqa 32(%rdi), %ymm2 +; CHECK-NEXT: vpmovsxbq {{.*#+}} ymm1 = [4,0,2,5] ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 -; CHECK-NEXT: vpermi2q 32(%rdi), %ymm2, %ymm1 {%k1} {z} +; CHECK-NEXT: vpermi2q (%rdi), %ymm2, %ymm1 {%k1} {z} ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %vec = load <8 x i64>, ptr %vp @@ -3514,12 +3514,11 @@ define <8 x float> @test_masked_16xfloat_to_8xfloat_perm_mem_mask2(ptr %vp, <8 x ; CHECK-FAST-PERLANE-LABEL: test_masked_16xfloat_to_8xfloat_perm_mem_mask2: ; CHECK-FAST-PERLANE: # %bb.0: ; CHECK-FAST-PERLANE-NEXT: vmovaps (%rdi), %xmm2 -; CHECK-FAST-PERLANE-NEXT: vmovaps 32(%rdi), %ymm3 -; CHECK-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm4 = [9,5,2,3,2,8,8,1] -; CHECK-FAST-PERLANE-NEXT: vpermi2ps %ymm2, %ymm3, %ymm4 +; CHECK-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm3 = [1,13,10,11,10,0,0,9] +; CHECK-FAST-PERLANE-NEXT: vpermi2ps 32(%rdi), %ymm2, %ymm3 ; CHECK-FAST-PERLANE-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-FAST-PERLANE-NEXT: vcmpeqps %ymm2, %ymm1, %k1 -; CHECK-FAST-PERLANE-NEXT: vmovaps %ymm4, %ymm0 {%k1} +; CHECK-FAST-PERLANE-NEXT: vmovaps %ymm3, %ymm0 {%k1} ; CHECK-FAST-PERLANE-NEXT: retq %vec = load <16 x float>, ptr %vp %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <8 x i32> @@ -3542,11 +3541,10 @@ define <8 x float> @test_masked_z_16xfloat_to_8xfloat_perm_mem_mask2(ptr %vp, <8 ; CHECK-FAST-PERLANE-LABEL: test_masked_z_16xfloat_to_8xfloat_perm_mem_mask2: ; CHECK-FAST-PERLANE: # %bb.0: ; CHECK-FAST-PERLANE-NEXT: vmovaps (%rdi), %xmm2 -; CHECK-FAST-PERLANE-NEXT: vmovaps 32(%rdi), %ymm3 -; CHECK-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm1 = [9,5,2,3,2,8,8,1] -; CHECK-FAST-PERLANE-NEXT: vxorps %xmm4, %xmm4, %xmm4 -; CHECK-FAST-PERLANE-NEXT: vcmpeqps %ymm4, %ymm0, %k1 -; CHECK-FAST-PERLANE-NEXT: vpermi2ps %ymm2, %ymm3, %ymm1 {%k1} {z} +; CHECK-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm1 = [1,13,10,11,10,0,0,9] +; CHECK-FAST-PERLANE-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; CHECK-FAST-PERLANE-NEXT: vcmpeqps %ymm3, %ymm0, %k1 +; CHECK-FAST-PERLANE-NEXT: vpermi2ps 32(%rdi), %ymm2, %ymm1 {%k1} {z} ; CHECK-FAST-PERLANE-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-FAST-PERLANE-NEXT: retq %vec = load <16 x float>, ptr %vp @@ -4398,9 +4396,9 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask0(ptr %vp, define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mem_mask1(ptr %vp, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-FAST-LABEL: test_masked_8xdouble_to_4xdouble_perm_mem_mask1: ; CHECK-FAST: # %bb.0: -; CHECK-FAST-NEXT: vmovapd (%rdi), %ymm2 -; CHECK-FAST-NEXT: vmovapd {{.*#+}} ymm3 = [3,4,2,6] -; CHECK-FAST-NEXT: vpermi2pd 32(%rdi){1to4}, %ymm2, %ymm3 +; CHECK-FAST-NEXT: vbroadcastsd 32(%rdi), %ymm2 +; CHECK-FAST-NEXT: vmovapd {{.*#+}} ymm3 = [7,0,6,2] +; CHECK-FAST-NEXT: vpermi2pd (%rdi), %ymm2, %ymm3 ; CHECK-FAST-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-FAST-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-FAST-NEXT: vmovapd %ymm3, %ymm0 {%k1} @@ -4423,11 +4421,11 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mem_mask1(ptr %vp, <4 define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask1(ptr %vp, <4 x double> %mask) { ; CHECK-FAST-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mem_mask1: ; CHECK-FAST: # %bb.0: -; CHECK-FAST-NEXT: vmovapd (%rdi), %ymm2 -; CHECK-FAST-NEXT: vmovapd {{.*#+}} ymm1 = [3,4,2,6] +; CHECK-FAST-NEXT: vbroadcastsd 32(%rdi), %ymm2 +; CHECK-FAST-NEXT: vmovapd {{.*#+}} ymm1 = [7,0,6,2] ; CHECK-FAST-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-FAST-NEXT: vcmpeqpd %ymm3, %ymm0, %k1 -; CHECK-FAST-NEXT: vpermi2pd 32(%rdi){1to4}, %ymm2, %ymm1 {%k1} {z} +; CHECK-FAST-NEXT: vpermi2pd (%rdi), %ymm2, %ymm1 {%k1} {z} ; CHECK-FAST-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-FAST-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll index 832e55a835525..24eb9b3715ed6 100644 --- a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll +++ b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll @@ -1441,88 +1441,44 @@ define <4 x i32> @zext_bool_logic(<4 x i64> %cond1, <4 x i64> %cond2, <4 x i32> define void @half_vec_compare(ptr %x, ptr %y) { ; KNL-LABEL: half_vec_compare: ; KNL: ## %bb.0: ## %entry -; KNL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; KNL-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x07] -; KNL-NEXT: vpshuflw $85, %xmm0, %xmm1 ## encoding: [0xc5,0xfb,0x70,0xc8,0x55] -; KNL-NEXT: ## xmm1 = xmm0[1,1,1,1,4,5,6,7] -; KNL-NEXT: vcvtph2ps %xmm1, %xmm1 ## encoding: [0xc4,0xe2,0x79,0x13,0xc9] -; KNL-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2] -; KNL-NEXT: vucomiss %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xca] -; KNL-NEXT: movl $65535, %ecx ## encoding: [0xb9,0xff,0xff,0x00,0x00] -; KNL-NEXT: ## imm = 0xFFFF -; KNL-NEXT: movl $0, %edx ## encoding: [0xba,0x00,0x00,0x00,0x00] -; KNL-NEXT: cmovnel %ecx, %edx ## encoding: [0x0f,0x45,0xd1] -; KNL-NEXT: cmovpl %ecx, %edx ## encoding: [0x0f,0x4a,0xd1] -; KNL-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0] -; KNL-NEXT: vucomiss %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc2] -; KNL-NEXT: cmovnel %ecx, %eax ## encoding: [0x0f,0x45,0xc1] -; KNL-NEXT: cmovpl %ecx, %eax ## encoding: [0x0f,0x4a,0xc1] -; KNL-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] -; KNL-NEXT: vpinsrw $1, %edx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc2,0x01] -; KNL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc0] +; KNL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; KNL-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] +; KNL-NEXT: vcvtph2ps %xmm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x13,0xc0] +; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9] +; KNL-NEXT: vcmpneqps %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfc,0xc2,0xc1,0x04] +; KNL-NEXT: vpmovdb %zmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0] ; KNL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A] ; KNL-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte ; KNL-NEXT: vpextrw $0, %xmm0, (%rsi) ## encoding: [0xc4,0xe3,0x79,0x15,0x06,0x00] +; KNL-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; KNL-NEXT: retq ## encoding: [0xc3] ; ; AVX512BW-LABEL: half_vec_compare: ; AVX512BW: ## %bb.0: ## %entry -; AVX512BW-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX512BW-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x07] -; AVX512BW-NEXT: vpshuflw $85, %xmm0, %xmm1 ## encoding: [0xc5,0xfb,0x70,0xc8,0x55] -; AVX512BW-NEXT: ## xmm1 = xmm0[1,1,1,1,4,5,6,7] -; AVX512BW-NEXT: vcvtph2ps %xmm1, %xmm1 ## encoding: [0xc4,0xe2,0x79,0x13,0xc9] -; AVX512BW-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; AVX512BW-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2] -; AVX512BW-NEXT: vucomiss %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xca] -; AVX512BW-NEXT: movl $65535, %ecx ## encoding: [0xb9,0xff,0xff,0x00,0x00] -; AVX512BW-NEXT: ## imm = 0xFFFF -; AVX512BW-NEXT: movl $0, %edx ## encoding: [0xba,0x00,0x00,0x00,0x00] -; AVX512BW-NEXT: cmovnel %ecx, %edx ## encoding: [0x0f,0x45,0xd1] -; AVX512BW-NEXT: cmovpl %ecx, %edx ## encoding: [0x0f,0x4a,0xd1] -; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0] -; AVX512BW-NEXT: vucomiss %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc2] -; AVX512BW-NEXT: cmovnel %ecx, %eax ## encoding: [0x0f,0x45,0xc1] -; AVX512BW-NEXT: cmovpl %ecx, %eax ## encoding: [0x0f,0x4a,0xc1] -; AVX512BW-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] -; AVX512BW-NEXT: vpinsrw $1, %edx, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc2,0x01] -; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc0] +; AVX512BW-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512BW-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] +; AVX512BW-NEXT: vcvtph2ps %xmm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x13,0xc0] +; AVX512BW-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9] +; AVX512BW-NEXT: vcmpneqps %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfc,0xc2,0xc1,0x04] +; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0] ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A] ; AVX512BW-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte ; AVX512BW-NEXT: vpextrw $0, %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x15,0x06,0x00] +; AVX512BW-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; AVX512BW-NEXT: retq ## encoding: [0xc3] ; ; SKX-LABEL: half_vec_compare: ; SKX: ## %bb.0: ## %entry -; SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SKX-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x07] -; SKX-NEXT: vpshuflw $85, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xc8,0x55] -; SKX-NEXT: ## xmm1 = xmm0[1,1,1,1,4,5,6,7] -; SKX-NEXT: vcvtph2ps %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc9] -; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x57,0xd2] -; SKX-NEXT: vucomiss %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xca] -; SKX-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0] -; SKX-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1] -; SKX-NEXT: orb %al, %cl ## encoding: [0x08,0xc1] -; SKX-NEXT: testb %cl, %cl ## encoding: [0x84,0xc9] -; SKX-NEXT: setne %al ## encoding: [0x0f,0x95,0xc0] -; SKX-NEXT: vcvtph2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0] -; SKX-NEXT: vucomiss %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc2] -; SKX-NEXT: setp %cl ## encoding: [0x0f,0x9a,0xc1] -; SKX-NEXT: setne %dl ## encoding: [0x0f,0x95,0xc2] -; SKX-NEXT: orb %cl, %dl ## encoding: [0x08,0xca] -; SKX-NEXT: testb %dl, %dl ## encoding: [0x84,0xd2] -; SKX-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1] -; SKX-NEXT: andl $1, %ecx ## encoding: [0x83,0xe1,0x01] -; SKX-NEXT: kmovw %ecx, %k0 ## encoding: [0xc5,0xf8,0x92,0xc1] -; SKX-NEXT: kmovd %eax, %k1 ## encoding: [0xc5,0xfb,0x92,0xc8] -; SKX-NEXT: kshiftlw $1, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x01] -; SKX-NEXT: korw %k1, %k0, %k1 ## encoding: [0xc5,0xfc,0x45,0xc9] +; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SKX-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] +; SKX-NEXT: vcvtph2ps %xmm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x13,0xc0] +; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x57,0xc9] +; SKX-NEXT: vcmpneqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x04] ; SKX-NEXT: vmovdqu8 {{.*#+}} xmm0 {%k1} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] ; SKX-NEXT: ## encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x05,A,A,A,A] ; SKX-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte ; SKX-NEXT: vpextrw $0, %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x15,0x06,0x00] +; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; SKX-NEXT: retq ## encoding: [0xc3] entry: %0 = load <2 x half>, ptr %x diff --git a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll index 41d9a867c0a96..07701f082b0e2 100644 --- a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll +++ b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll @@ -1641,188 +1641,26 @@ define <4 x half> @test_fmaximum_v4f16(<4 x half> %x, <4 x half> %y) { ; ; AVX512-LABEL: test_fmaximum_v4f16: ; AVX512: # %bb.0: -; AVX512-NEXT: pushq %rbp -; AVX512-NEXT: .cfi_def_cfa_offset 16 -; AVX512-NEXT: pushq %r15 -; AVX512-NEXT: .cfi_def_cfa_offset 24 -; AVX512-NEXT: pushq %r14 -; AVX512-NEXT: .cfi_def_cfa_offset 32 -; AVX512-NEXT: pushq %r13 -; AVX512-NEXT: .cfi_def_cfa_offset 40 -; AVX512-NEXT: pushq %r12 -; AVX512-NEXT: .cfi_def_cfa_offset 48 -; AVX512-NEXT: pushq %rbx -; AVX512-NEXT: .cfi_def_cfa_offset 56 -; AVX512-NEXT: .cfi_offset %rbx, -56 -; AVX512-NEXT: .cfi_offset %r12, -48 -; AVX512-NEXT: .cfi_offset %r13, -40 -; AVX512-NEXT: .cfi_offset %r14, -32 -; AVX512-NEXT: .cfi_offset %r15, -24 -; AVX512-NEXT: .cfi_offset %rbp, -16 -; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2 -; AVX512-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] -; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3 -; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: vucomiss %xmm2, %xmm3 -; AVX512-NEXT: movl $65535, %ecx # imm = 0xFFFF -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovpl %ecx, %edx -; AVX512-NEXT: movl $0, %edi -; AVX512-NEXT: cmoval %ecx, %edi -; AVX512-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2 -; AVX512-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3 -; AVX512-NEXT: vucomiss %xmm2, %xmm3 -; AVX512-NEXT: movl $0, %esi -; AVX512-NEXT: cmovpl %ecx, %esi -; AVX512-NEXT: movl $0, %r9d -; AVX512-NEXT: cmoval %ecx, %r9d -; AVX512-NEXT: vshufpd {{.*#+}} xmm2 = xmm1[1,0] -; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2 -; AVX512-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0] -; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3 -; AVX512-NEXT: vucomiss %xmm2, %xmm3 -; AVX512-NEXT: movl $0, %r8d -; AVX512-NEXT: cmovpl %ecx, %r8d -; AVX512-NEXT: movl $0, %r11d -; AVX512-NEXT: cmoval %ecx, %r11d -; AVX512-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[3,3,3,3,4,5,6,7] -; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2 -; AVX512-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[3,3,3,3,4,5,6,7] -; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3 -; AVX512-NEXT: vucomiss %xmm2, %xmm3 -; AVX512-NEXT: movl $0, %r10d -; AVX512-NEXT: cmovpl %ecx, %r10d -; AVX512-NEXT: movl $0, %ebp -; AVX512-NEXT: cmoval %ecx, %ebp -; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] -; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] -; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3 -; AVX512-NEXT: vucomiss %xmm2, %xmm3 -; AVX512-NEXT: movl $0, %ebx -; AVX512-NEXT: cmovpl %ecx, %ebx -; AVX512-NEXT: movl $0, %r14d -; AVX512-NEXT: cmoval %ecx, %r14d -; AVX512-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[1,1,1,1,4,5,6,7] -; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2 -; AVX512-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[1,1,1,1,4,5,6,7] -; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3 -; AVX512-NEXT: vucomiss %xmm2, %xmm3 -; AVX512-NEXT: movl $0, %r15d -; AVX512-NEXT: cmovpl %ecx, %r15d -; AVX512-NEXT: movl $0, %r12d -; AVX512-NEXT: cmoval %ecx, %r12d -; AVX512-NEXT: vcvtph2ps %xmm1, %xmm2 -; AVX512-NEXT: vcvtph2ps %xmm0, %xmm3 -; AVX512-NEXT: vucomiss %xmm2, %xmm3 -; AVX512-NEXT: movl $0, %r13d -; AVX512-NEXT: cmoval %ecx, %r13d -; AVX512-NEXT: vmovd %r13d, %xmm2 -; AVX512-NEXT: vpinsrw $1, %r12d, %xmm2, %xmm2 -; AVX512-NEXT: vpinsrw $2, %r14d, %xmm2, %xmm2 -; AVX512-NEXT: vpinsrw $3, %ebp, %xmm2, %xmm2 -; AVX512-NEXT: vpinsrw $4, %r11d, %xmm2, %xmm2 -; AVX512-NEXT: vpinsrw $5, %r9d, %xmm2, %xmm2 -; AVX512-NEXT: vpinsrw $6, %edi, %xmm2, %xmm2 -; AVX512-NEXT: movl $0, %edi -; AVX512-NEXT: cmovpl %ecx, %edi -; AVX512-NEXT: vpsrldq {{.*#+}} xmm3 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3 -; AVX512-NEXT: vpsrldq {{.*#+}} xmm4 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; AVX512-NEXT: vcvtph2ps %xmm4, %xmm4 -; AVX512-NEXT: vucomiss %xmm3, %xmm4 -; AVX512-NEXT: movl $0, %r9d -; AVX512-NEXT: cmoval %ecx, %r9d -; AVX512-NEXT: vpinsrw $7, %r9d, %xmm2, %xmm2 -; AVX512-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm2 -; AVX512-NEXT: vmovd %edi, %xmm3 -; AVX512-NEXT: vpinsrw $1, %r15d, %xmm3, %xmm3 -; AVX512-NEXT: vpinsrw $2, %ebx, %xmm3, %xmm3 -; AVX512-NEXT: vpinsrw $3, %r10d, %xmm3, %xmm3 -; AVX512-NEXT: vpinsrw $4, %r8d, %xmm3, %xmm3 -; AVX512-NEXT: vpinsrw $5, %esi, %xmm3, %xmm3 -; AVX512-NEXT: vpinsrw $6, %edx, %xmm3, %xmm3 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovpl %ecx, %edx -; AVX512-NEXT: vpinsrw $7, %edx, %xmm3, %xmm3 -; AVX512-NEXT: vpbroadcastw {{.*#+}} xmm4 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] -; AVX512-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2 -; AVX512-NEXT: vpshuflw {{.*#+}} xmm3 = xmm2[1,1,1,1,4,5,6,7] -; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3 -; AVX512-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512-NEXT: vucomiss %xmm4, %xmm3 -; AVX512-NEXT: movl $65535, %edx # imm = 0xFFFF -; AVX512-NEXT: cmovnel %eax, %edx -; AVX512-NEXT: cmovpl %eax, %edx -; AVX512-NEXT: vcvtph2ps %xmm2, %xmm3 -; AVX512-NEXT: vucomiss %xmm4, %xmm3 -; AVX512-NEXT: movl $65535, %esi # imm = 0xFFFF -; AVX512-NEXT: cmovnel %eax, %esi -; AVX512-NEXT: cmovpl %eax, %esi -; AVX512-NEXT: vmovd %esi, %xmm3 -; AVX512-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3 -; AVX512-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[1,1,1,1] -; AVX512-NEXT: vcvtph2ps %xmm5, %xmm5 -; AVX512-NEXT: vucomiss %xmm4, %xmm5 -; AVX512-NEXT: movl $65535, %edx # imm = 0xFFFF -; AVX512-NEXT: cmovnel %eax, %edx -; AVX512-NEXT: cmovpl %eax, %edx -; AVX512-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 -; AVX512-NEXT: vpshuflw {{.*#+}} xmm5 = xmm2[3,3,3,3,4,5,6,7] -; AVX512-NEXT: vcvtph2ps %xmm5, %xmm5 -; AVX512-NEXT: vucomiss %xmm4, %xmm5 -; AVX512-NEXT: movl $65535, %edx # imm = 0xFFFF -; AVX512-NEXT: cmovnel %eax, %edx -; AVX512-NEXT: cmovpl %eax, %edx -; AVX512-NEXT: vpinsrw $3, %edx, %xmm3, %xmm3 -; AVX512-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[2,3,2,3] -; AVX512-NEXT: vcvtph2ps %xmm5, %xmm5 -; AVX512-NEXT: vucomiss %xmm4, %xmm5 -; AVX512-NEXT: movl $65535, %edx # imm = 0xFFFF -; AVX512-NEXT: cmovnel %eax, %edx -; AVX512-NEXT: cmovpl %eax, %edx -; AVX512-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3 -; AVX512-NEXT: vpsrldq {{.*#+}} xmm5 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; AVX512-NEXT: vcvtph2ps %xmm5, %xmm5 -; AVX512-NEXT: vucomiss %xmm4, %xmm5 -; AVX512-NEXT: movl $65535, %edx # imm = 0xFFFF -; AVX512-NEXT: cmovnel %eax, %edx -; AVX512-NEXT: cmovpl %eax, %edx -; AVX512-NEXT: vpinsrw $5, %edx, %xmm3, %xmm3 -; AVX512-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[3,3,3,3] -; AVX512-NEXT: vcvtph2ps %xmm5, %xmm5 -; AVX512-NEXT: vucomiss %xmm4, %xmm5 -; AVX512-NEXT: movl $65535, %edx # imm = 0xFFFF -; AVX512-NEXT: cmovnel %eax, %edx -; AVX512-NEXT: cmovpl %eax, %edx -; AVX512-NEXT: vpinsrw $6, %edx, %xmm3, %xmm3 -; AVX512-NEXT: vpsrldq {{.*#+}} xmm5 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; AVX512-NEXT: vcvtph2ps %xmm5, %xmm5 -; AVX512-NEXT: vucomiss %xmm4, %xmm5 -; AVX512-NEXT: cmovnel %eax, %ecx -; AVX512-NEXT: cmovpl %eax, %ecx -; AVX512-NEXT: vpinsrw $7, %ecx, %xmm3, %xmm3 -; AVX512-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm5 -; AVX512-NEXT: vpblendvb %xmm5, %xmm0, %xmm2, %xmm0 -; AVX512-NEXT: vpcmpeqw %xmm4, %xmm1, %xmm4 -; AVX512-NEXT: vpblendvb %xmm4, %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpblendvb %xmm3, %xmm0, %xmm2, %xmm0 -; AVX512-NEXT: popq %rbx -; AVX512-NEXT: .cfi_def_cfa_offset 48 -; AVX512-NEXT: popq %r12 -; AVX512-NEXT: .cfi_def_cfa_offset 40 -; AVX512-NEXT: popq %r13 -; AVX512-NEXT: .cfi_def_cfa_offset 32 -; AVX512-NEXT: popq %r14 -; AVX512-NEXT: .cfi_def_cfa_offset 24 -; AVX512-NEXT: popq %r15 -; AVX512-NEXT: .cfi_def_cfa_offset 16 -; AVX512-NEXT: popq %rbp -; AVX512-NEXT: .cfi_def_cfa_offset 8 +; AVX512-NEXT: vcvtph2ps %xmm0, %ymm2 +; AVX512-NEXT: vcvtph2ps %xmm1, %ymm3 +; AVX512-NEXT: vcmpltps %ymm2, %ymm3, %ymm4 +; AVX512-NEXT: vpmovdw %zmm4, %ymm4 +; AVX512-NEXT: vpblendvb %xmm4, %xmm0, %xmm1, %xmm4 +; AVX512-NEXT: vcmpunordps %ymm3, %ymm2, %ymm2 +; AVX512-NEXT: vpmovdw %zmm2, %ymm2 +; AVX512-NEXT: vpbroadcastw {{.*#+}} xmm3 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] +; AVX512-NEXT: vpblendvb %xmm2, %xmm3, %xmm4, %xmm2 +; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm4 +; AVX512-NEXT: vpblendvb %xmm4, %xmm0, %xmm2, %xmm0 +; AVX512-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm3 +; AVX512-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vcvtph2ps %xmm2, %ymm1 +; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512-NEXT: vcmpeqps %ymm3, %ymm1, %ymm1 +; AVX512-NEXT: vpmovdw %zmm1, %ymm1 +; AVX512-NEXT: vpblendvb %xmm1, %xmm0, %xmm2, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq ; ; X86-LABEL: test_fmaximum_v4f16: diff --git a/llvm/test/CodeGen/X86/half.ll b/llvm/test/CodeGen/X86/half.ll index 9f01d07e6a670..033cadae6a1e7 100644 --- a/llvm/test/CodeGen/X86/half.ll +++ b/llvm/test/CodeGen/X86/half.ll @@ -1166,15 +1166,15 @@ define void @main.45() #0 { ; ; BWON-F16C-LABEL: main.45: ; BWON-F16C: # %bb.0: # %entry -; BWON-F16C-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7] -; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 -; BWON-F16C-NEXT: xorl %eax, %eax -; BWON-F16C-NEXT: vucomiss %xmm0, %xmm0 -; BWON-F16C-NEXT: movl $65535, %ecx # imm = 0xFFFF -; BWON-F16C-NEXT: cmovnpl %eax, %ecx -; BWON-F16C-NEXT: vmovd %ecx, %xmm0 +; BWON-F16C-NEXT: vpinsrw $0, (%rax), %xmm0, %xmm0 +; BWON-F16C-NEXT: vpextrw $0, %xmm0, %eax ; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] +; BWON-F16C-NEXT: vmovd %eax, %xmm1 +; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] +; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 +; BWON-F16C-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; BWON-F16C-NEXT: vcmpunordps %xmm2, %xmm0, %xmm0 +; BWON-F16C-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 ; BWON-F16C-NEXT: vpblendvb %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 ; BWON-F16C-NEXT: vmovq %xmm0, (%rax) ; BWON-F16C-NEXT: retq diff --git a/llvm/test/CodeGen/X86/insert-into-constant-vector.ll b/llvm/test/CodeGen/X86/insert-into-constant-vector.ll index f371ec10fe25f..6e41e1bb87eb2 100644 --- a/llvm/test/CodeGen/X86/insert-into-constant-vector.ll +++ b/llvm/test/CodeGen/X86/insert-into-constant-vector.ll @@ -447,9 +447,8 @@ define <8 x i64> @elt5_v8i64(i64 %x) { ; X64-AVX512F-LABEL: elt5_v8i64: ; X64-AVX512F: # %bb.0: ; X64-AVX512F-NEXT: vmovq %rdi, %xmm1 -; X64-AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,1,2,3,4,8,6,7] -; X64-AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm0 = [42,1,2,3,4,0,6,7] -; X64-AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0 +; X64-AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,9,10,11,12,0,14,15] +; X64-AVX512F-NEXT: vpermi2q {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 ; X64-AVX512F-NEXT: retq %ins = insertelement <8 x i64> , i64 %x, i32 5 ret <8 x i64> %ins diff --git a/llvm/test/CodeGen/X86/pr114520.ll b/llvm/test/CodeGen/X86/pr114520.ll index 660b169e302d8..c557da6b3ab8c 100644 --- a/llvm/test/CodeGen/X86/pr114520.ll +++ b/llvm/test/CodeGen/X86/pr114520.ll @@ -21,83 +21,8 @@ entry: define <8 x half> @test2(<8 x half> %x) { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vcvtph2ps %xmm0, %xmm2 -; CHECK-NEXT: vmovss {{.*#+}} xmm1 = [-Inf,0.0E+0,0.0E+0,0.0E+0] -; CHECK-NEXT: vucomiss %xmm1, %xmm2 -; CHECK-NEXT: seta %al -; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: kmovw %eax, %k0 -; CHECK-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[1,1,1,1,4,5,6,7] -; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2 -; CHECK-NEXT: vucomiss %xmm1, %xmm2 -; CHECK-NEXT: seta %al -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: kshiftlw $15, %k1, %k1 -; CHECK-NEXT: kshiftrw $14, %k1, %k1 -; CHECK-NEXT: korw %k1, %k0, %k0 -; CHECK-NEXT: movw $-5, %ax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2 -; CHECK-NEXT: vucomiss %xmm1, %xmm2 -; CHECK-NEXT: seta %al -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: kshiftlw $15, %k1, %k1 -; CHECK-NEXT: kshiftrw $13, %k1, %k1 -; CHECK-NEXT: korw %k1, %k0, %k0 -; CHECK-NEXT: movw $-9, %ax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[3,3,3,3,4,5,6,7] -; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2 -; CHECK-NEXT: vucomiss %xmm1, %xmm2 -; CHECK-NEXT: seta %al -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: kshiftlw $15, %k1, %k1 -; CHECK-NEXT: kshiftrw $12, %k1, %k1 -; CHECK-NEXT: korw %k1, %k0, %k0 -; CHECK-NEXT: movw $-17, %ax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] -; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2 -; CHECK-NEXT: vucomiss %xmm1, %xmm2 -; CHECK-NEXT: seta %al -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: kshiftlw $15, %k1, %k1 -; CHECK-NEXT: kshiftrw $11, %k1, %k1 -; CHECK-NEXT: korw %k1, %k0, %k0 -; CHECK-NEXT: movw $-33, %ax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2 -; CHECK-NEXT: vucomiss %xmm1, %xmm2 -; CHECK-NEXT: seta %al -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: kshiftlw $15, %k1, %k1 -; CHECK-NEXT: kshiftrw $10, %k1, %k1 -; CHECK-NEXT: korw %k1, %k0, %k0 -; CHECK-NEXT: movw $-65, %ax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3] -; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2 -; CHECK-NEXT: vucomiss %xmm1, %xmm2 -; CHECK-NEXT: seta %al -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: kshiftlw $6, %k1, %k1 -; CHECK-NEXT: korw %k1, %k0, %k0 -; CHECK-NEXT: kshiftlw $9, %k0, %k0 -; CHECK-NEXT: kshiftrw $9, %k0, %k0 -; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2 -; CHECK-NEXT: vucomiss %xmm1, %xmm2 -; CHECK-NEXT: seta %al -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: kshiftlw $7, %k1, %k1 -; CHECK-NEXT: korw %k1, %k0, %k1 +; CHECK-NEXT: vcvtph2ps %xmm0, %ymm1 +; CHECK-NEXT: vcmpgtps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %k1 ; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf] ; CHECK-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 ; CHECK-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z} diff --git a/llvm/test/CodeGen/X86/pr57340.ll b/llvm/test/CodeGen/X86/pr57340.ll index 00a52c639e43c..6bebbe3cee1f9 100644 --- a/llvm/test/CodeGen/X86/pr57340.ll +++ b/llvm/test/CodeGen/X86/pr57340.ll @@ -4,236 +4,13 @@ define void @main.41() local_unnamed_addr #1 { ; CHECK-LABEL: main.41: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vpbroadcastw (%rax), %xmm0 -; CHECK-NEXT: vpextrw $0, %xmm0, %eax -; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1 -; CHECK-NEXT: vmovdqu (%rax), %ymm3 +; CHECK-NEXT: vpbroadcastw (%rax), %ymm0 +; CHECK-NEXT: vmovdqu (%rax), %ymm1 ; CHECK-NEXT: vpmovsxbw {{.*#+}} ymm2 = [31,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] -; CHECK-NEXT: vpermi2w %ymm1, %ymm3, %ymm2 -; CHECK-NEXT: vprold $16, %xmm2, %xmm1 -; CHECK-NEXT: vcvtph2ps %xmm1, %xmm3 -; CHECK-NEXT: vmovdqu (%rax), %xmm5 -; CHECK-NEXT: vprold $16, %xmm5, %xmm1 -; CHECK-NEXT: vcvtph2ps %xmm1, %xmm1 -; CHECK-NEXT: vucomiss %xmm3, %xmm1 -; CHECK-NEXT: setnp %cl -; CHECK-NEXT: sete %dl -; CHECK-NEXT: testb %cl, %dl -; CHECK-NEXT: setne %cl -; CHECK-NEXT: kmovd %ecx, %k0 -; CHECK-NEXT: kshiftlw $15, %k0, %k0 -; CHECK-NEXT: vmovd %eax, %xmm3 -; CHECK-NEXT: vcvtph2ps %xmm3, %xmm3 -; CHECK-NEXT: vcvtph2ps %xmm5, %xmm6 -; CHECK-NEXT: kshiftrw $14, %k0, %k0 -; CHECK-NEXT: vucomiss %xmm3, %xmm6 -; CHECK-NEXT: setnp %al -; CHECK-NEXT: sete %cl -; CHECK-NEXT: testb %al, %cl -; CHECK-NEXT: setne %al -; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: korw %k0, %k1, %k0 -; CHECK-NEXT: movw $-5, %ax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] -; CHECK-NEXT: vcvtph2ps %xmm3, %xmm3 -; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0 -; CHECK-NEXT: vucomiss %xmm3, %xmm0 -; CHECK-NEXT: setnp %al -; CHECK-NEXT: sete %cl -; CHECK-NEXT: testb %al, %cl -; CHECK-NEXT: setne %al -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlw $15, %k1, %k1 -; CHECK-NEXT: kshiftrw $13, %k1, %k1 -; CHECK-NEXT: korw %k1, %k0, %k0 -; CHECK-NEXT: movw $-9, %ax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vprolq $16, %xmm2, %xmm3 -; CHECK-NEXT: vcvtph2ps %xmm3, %xmm4 -; CHECK-NEXT: vprolq $16, %xmm5, %xmm3 -; CHECK-NEXT: vcvtph2ps %xmm3, %xmm3 -; CHECK-NEXT: vucomiss %xmm4, %xmm3 -; CHECK-NEXT: setnp %al -; CHECK-NEXT: sete %cl -; CHECK-NEXT: testb %al, %cl -; CHECK-NEXT: setne %al -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlw $15, %k1, %k1 -; CHECK-NEXT: kshiftrw $12, %k1, %k1 -; CHECK-NEXT: korw %k1, %k0, %k0 -; CHECK-NEXT: movw $-17, %ax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[2,3,0,1] -; CHECK-NEXT: vcvtph2ps %xmm4, %xmm4 -; CHECK-NEXT: vucomiss %xmm4, %xmm0 -; CHECK-NEXT: setnp %al -; CHECK-NEXT: sete %cl -; CHECK-NEXT: testb %al, %cl -; CHECK-NEXT: setne %al -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlw $15, %k1, %k1 -; CHECK-NEXT: kshiftrw $11, %k1, %k1 -; CHECK-NEXT: korw %k1, %k0, %k0 -; CHECK-NEXT: movw $-33, %ax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpsrldq {{.*#+}} xmm4 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; CHECK-NEXT: vcvtph2ps %xmm4, %xmm7 -; CHECK-NEXT: vpsrldq {{.*#+}} xmm4 = xmm5[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; CHECK-NEXT: vcvtph2ps %xmm4, %xmm4 -; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vucomiss %xmm7, %xmm4 -; CHECK-NEXT: setnp %al -; CHECK-NEXT: sete %cl -; CHECK-NEXT: testb %al, %cl -; CHECK-NEXT: setne %al -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlw $15, %k1, %k1 -; CHECK-NEXT: kshiftrw $10, %k1, %k1 -; CHECK-NEXT: korw %k1, %k0, %k0 -; CHECK-NEXT: movw $-65, %ax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vshufps {{.*#+}} xmm7 = xmm2[3,3,3,3] -; CHECK-NEXT: vcvtph2ps %xmm7, %xmm7 -; CHECK-NEXT: vucomiss %xmm7, %xmm0 -; CHECK-NEXT: setnp %al -; CHECK-NEXT: sete %cl -; CHECK-NEXT: testb %al, %cl -; CHECK-NEXT: setne %al -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlw $15, %k1, %k1 -; CHECK-NEXT: kshiftrw $9, %k1, %k1 -; CHECK-NEXT: korw %k1, %k0, %k0 -; CHECK-NEXT: movw $-129, %ax -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vpsrldq {{.*#+}} xmm7 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; CHECK-NEXT: vcvtph2ps %xmm7, %xmm7 -; CHECK-NEXT: vpsrldq {{.*#+}} xmm5 = xmm5[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; CHECK-NEXT: vcvtph2ps %xmm5, %xmm5 -; CHECK-NEXT: vucomiss %xmm7, %xmm5 -; CHECK-NEXT: setnp %al -; CHECK-NEXT: sete %cl -; CHECK-NEXT: testb %al, %cl -; CHECK-NEXT: setne %al -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlw $15, %k1, %k1 -; CHECK-NEXT: kshiftrw $8, %k1, %k1 -; CHECK-NEXT: korw %k1, %k0, %k0 -; CHECK-NEXT: movw $-257, %ax # imm = 0xFEFF -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vextracti128 $1, %ymm2, %xmm2 -; CHECK-NEXT: vcvtph2ps %xmm2, %xmm7 -; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vucomiss %xmm7, %xmm6 -; CHECK-NEXT: setnp %al -; CHECK-NEXT: sete %cl -; CHECK-NEXT: testb %al, %cl -; CHECK-NEXT: setne %al -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlw $15, %k1, %k1 -; CHECK-NEXT: kshiftrw $7, %k1, %k1 -; CHECK-NEXT: korw %k1, %k0, %k0 -; CHECK-NEXT: movw $-513, %ax # imm = 0xFDFF -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vprold $16, %xmm2, %xmm6 -; CHECK-NEXT: vcvtph2ps %xmm6, %xmm6 -; CHECK-NEXT: vucomiss %xmm6, %xmm1 -; CHECK-NEXT: setnp %al -; CHECK-NEXT: sete %cl -; CHECK-NEXT: testb %al, %cl -; CHECK-NEXT: setne %al -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlw $15, %k1, %k1 -; CHECK-NEXT: kshiftrw $6, %k1, %k1 -; CHECK-NEXT: korw %k1, %k0, %k0 -; CHECK-NEXT: movw $-1025, %ax # imm = 0xFBFF -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; CHECK-NEXT: vcvtph2ps %xmm1, %xmm1 -; CHECK-NEXT: vucomiss %xmm1, %xmm0 -; CHECK-NEXT: setnp %al -; CHECK-NEXT: sete %cl -; CHECK-NEXT: testb %al, %cl -; CHECK-NEXT: setne %al -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlw $15, %k1, %k1 -; CHECK-NEXT: kshiftrw $5, %k1, %k1 -; CHECK-NEXT: korw %k1, %k0, %k0 -; CHECK-NEXT: movw $-2049, %ax # imm = 0xF7FF -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vprolq $16, %xmm2, %xmm1 -; CHECK-NEXT: vcvtph2ps %xmm1, %xmm1 -; CHECK-NEXT: vucomiss %xmm1, %xmm3 -; CHECK-NEXT: setnp %al -; CHECK-NEXT: sete %cl -; CHECK-NEXT: testb %al, %cl -; CHECK-NEXT: setne %al -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlw $15, %k1, %k1 -; CHECK-NEXT: kshiftrw $4, %k1, %k1 -; CHECK-NEXT: korw %k1, %k0, %k0 -; CHECK-NEXT: movw $-4097, %ax # imm = 0xEFFF -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[2,3,0,1] -; CHECK-NEXT: vcvtph2ps %xmm1, %xmm1 -; CHECK-NEXT: vucomiss %xmm1, %xmm0 -; CHECK-NEXT: setnp %al -; CHECK-NEXT: sete %cl -; CHECK-NEXT: testb %al, %cl -; CHECK-NEXT: setne %al -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlw $15, %k1, %k1 -; CHECK-NEXT: kshiftrw $3, %k1, %k1 -; CHECK-NEXT: korw %k1, %k0, %k0 -; CHECK-NEXT: movw $-8193, %ax # imm = 0xDFFF -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vpsrldq {{.*#+}} xmm1 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; CHECK-NEXT: vcvtph2ps %xmm1, %xmm1 -; CHECK-NEXT: vucomiss %xmm1, %xmm4 -; CHECK-NEXT: setnp %al -; CHECK-NEXT: sete %cl -; CHECK-NEXT: testb %al, %cl -; CHECK-NEXT: setne %al -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlw $15, %k1, %k1 -; CHECK-NEXT: kshiftrw $2, %k1, %k1 -; CHECK-NEXT: korw %k1, %k0, %k0 -; CHECK-NEXT: movw $-16385, %ax # imm = 0xBFFF -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufps {{.*#+}} xmm1 = xmm2[3,3,3,3] -; CHECK-NEXT: vcvtph2ps %xmm1, %xmm1 -; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vucomiss %xmm1, %xmm0 -; CHECK-NEXT: setnp %al -; CHECK-NEXT: sete %cl -; CHECK-NEXT: testb %al, %cl -; CHECK-NEXT: setne %al -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlw $14, %k1, %k1 -; CHECK-NEXT: korw %k1, %k0, %k0 -; CHECK-NEXT: kshiftlw $1, %k0, %k0 -; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0 -; CHECK-NEXT: kshiftrw $1, %k0, %k0 -; CHECK-NEXT: vucomiss %xmm0, %xmm5 -; CHECK-NEXT: setnp %al -; CHECK-NEXT: sete %cl -; CHECK-NEXT: testb %al, %cl -; CHECK-NEXT: setne %al -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlw $15, %k1, %k1 -; CHECK-NEXT: korw %k1, %k0, %k1 +; CHECK-NEXT: vpermi2w %ymm0, %ymm1, %ymm2 +; CHECK-NEXT: vcvtph2ps %ymm2, %zmm0 +; CHECK-NEXT: vcvtph2ps %ymm1, %zmm1 +; CHECK-NEXT: vcmpeqps %zmm0, %zmm1, %k1 ; CHECK-NEXT: vmovdqu8 {{.*#+}} xmm0 {%k1} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] ; CHECK-NEXT: vmovdqa %xmm0, (%rax) ; CHECK-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/tailcall-nofpclass.ll b/llvm/test/CodeGen/X86/tailcall-nofpclass.ll new file mode 100644 index 0000000000000..fd085bb1244fb --- /dev/null +++ b/llvm/test/CodeGen/X86/tailcall-nofpclass.ll @@ -0,0 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s + +; Ensures that taillcall optimization can still be +; performed when nofpclass is used. + +define noundef nofpclass(nan inf) float @_Z3foof(float noundef nofpclass(nan inf) %0) { +; CHECK-LABEL: _Z3foof: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp expf@PLT # TAILCALL + %2 = tail call float @llvm.exp.f32(float %0) + ret float %2 +} diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll index 21dfdc3c2abe4..49062eaef3188 100644 --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -3011,25 +3011,26 @@ entry: ret <4 x double> %log2 } -define <1 x float> @constrained_vector_rint_v1f32() #0 { -; CHECK-LABEL: constrained_vector_rint_v1f32: +define <1 x float> @constrained_vector_rint_v1f32_var(ptr %a) #0 { +; CHECK-LABEL: constrained_vector_rint_v1f32_var: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: callq rintf@PLT ; CHECK-NEXT: popq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_rint_v1f32: +; AVX-LABEL: constrained_vector_rint_v1f32_var: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq entry: + %b = load <1 x float>, ptr %a %rint = call <1 x float> @llvm.experimental.constrained.rint.v1f32( - <1 x float> , + <1 x float> %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret <1 x float> %rint @@ -3063,42 +3064,77 @@ entry: ret <2 x double> %rint } -define <3 x float> @constrained_vector_rint_v3f32() #0 { -; CHECK-LABEL: constrained_vector_rint_v3f32: +define <2 x double> @constrained_vector_rint_v2f64_var(ptr %a) #0 { +; CHECK-LABEL: constrained_vector_rint_v2f64_var: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: subq $40, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movaps (%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: callq rint@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: callq rint@PLT +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_rint_v2f64_var: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vroundpd $4, (%rdi), %xmm0 +; AVX-NEXT: retq +entry: + %b = load <2 x double>, ptr %a + %rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64( + <2 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %rint +} + +define <3 x float> @constrained_vector_rint_v3f32_var(ptr %a) #0 { +; CHECK-LABEL: constrained_vector_rint_v3f32_var: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $56, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: movaps (%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; CHECK-NEXT: callq rintf@PLT ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] ; CHECK-NEXT: callq rintf@PLT -; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: callq rintf@PLT -; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload -; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload -; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $56, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_rint_v3f32: +; AVX-LABEL: constrained_vector_rint_v3f32_var: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vmovss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; AVX-NEXT: vroundss $4, %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vmovss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] ; AVX-NEXT: vroundss $4, %xmm2, %xmm2, %xmm2 ; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] ; AVX-NEXT: retq entry: + %b = load <3 x float>, ptr %a %rint = call <3 x float> @llvm.experimental.constrained.rint.v3f32( - <3 x float> , + <3 x float> %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret <3 x float> %rint @@ -3143,6 +3179,51 @@ entry: ret <3 x double> %rint } +define <3 x double> @constrained_vector_rint_v3f64_var(ptr %a) #0 { +; CHECK-LABEL: constrained_vector_rint_v3f64_var: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movaps (%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: callq rint@PLT +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: callq rint@PLT +; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: callq rint@PLT +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_rint_v3f64_var: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vroundpd $4, (%rdi), %xmm1 +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: retq +entry: + %b = load <3 x double>, ptr %a + %rint = call <3 x double> @llvm.experimental.constrained.rint.v3f64( + <3 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x double> %rint +} + define <4 x double> @constrained_vector_rint_v4f64() #0 { ; CHECK-LABEL: constrained_vector_rint_v4f64: ; CHECK: # %bb.0: # %entry @@ -3182,25 +3263,70 @@ entry: ret <4 x double> %rint } -define <1 x float> @constrained_vector_nearbyint_v1f32() #0 { -; CHECK-LABEL: constrained_vector_nearbyint_v1f32: +define <4 x double> @constrained_vector_rint_v4f64_var(ptr %a) #0 { +; CHECK-LABEL: constrained_vector_rint_v4f64_var: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $56, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: movaps (%rdi), %xmm1 +; CHECK-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps 16(%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: callq rint@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: callq rint@PLT +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: callq rint@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: callq rint@PLT +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: addq $56, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_rint_v4f64_var: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vroundpd $4, (%rdi), %ymm0 +; AVX-NEXT: retq +entry: + %b = load <4 x double>, ptr %a + %rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64( + <4 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %rint +} + +define <1 x float> @constrained_vector_nearbyint_v1f32_var(ptr %a) #0 { +; CHECK-LABEL: constrained_vector_nearbyint_v1f32_var: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: callq nearbyintf@PLT ; CHECK-NEXT: popq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_nearbyint_v1f32: +; AVX-LABEL: constrained_vector_nearbyint_v1f32_var: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq entry: + %b = load <1 x float>, ptr %a %nearby = call <1 x float> @llvm.experimental.constrained.nearbyint.v1f32( - <1 x float> , + <1 x float> %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret <1 x float> %nearby @@ -3234,42 +3360,77 @@ entry: ret <2 x double> %nearby } -define <3 x float> @constrained_vector_nearbyint_v3f32() #0 { -; CHECK-LABEL: constrained_vector_nearbyint_v3f32: +define <2 x double> @constrained_vector_nearbyint_v2f64_var(ptr %a) #0 { +; CHECK-LABEL: constrained_vector_nearbyint_v2f64_var: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: subq $40, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movaps (%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: callq nearbyint@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: callq nearbyint@PLT +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_nearbyint_v2f64_var: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vroundpd $12, (%rdi), %xmm0 +; AVX-NEXT: retq +entry: + %b = load <2 x double>, ptr %a + %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64( + <2 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %nearby +} + +define <3 x float> @constrained_vector_nearbyint_v3f32_var(ptr %a) #0 { +; CHECK-LABEL: constrained_vector_nearbyint_v3f32_var: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $56, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: movaps (%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; CHECK-NEXT: callq nearbyintf@PLT ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] ; CHECK-NEXT: callq nearbyintf@PLT -; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: callq nearbyintf@PLT -; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload -; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload -; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $56, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_nearbyint_v3f32: +; AVX-LABEL: constrained_vector_nearbyint_v3f32_var: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vmovss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; AVX-NEXT: vroundss $12, %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vmovss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] ; AVX-NEXT: vroundss $12, %xmm2, %xmm2, %xmm2 ; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] ; AVX-NEXT: retq entry: + %b = load <3 x float>, ptr %a %nearby = call <3 x float> @llvm.experimental.constrained.nearbyint.v3f32( - <3 x float> , + <3 x float> %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret <3 x float> %nearby @@ -3314,6 +3475,51 @@ entry: ret <3 x double> %nearby } +define <3 x double> @constrained_vector_nearbyint_v3f64_var(ptr %a) #0 { +; CHECK-LABEL: constrained_vector_nearbyint_v3f64_var: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movaps (%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: callq nearbyint@PLT +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: callq nearbyint@PLT +; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: callq nearbyint@PLT +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_nearbyint_v3f64_var: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vroundpd $12, (%rdi), %xmm1 +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: retq +entry: + %b = load <3 x double>, ptr %a + %nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64( + <3 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x double> %nearby +} + define <4 x double> @constrained_vector_nearbyint_v4f64() #0 { ; CHECK-LABEL: constrained_vector_nearbyint_v4f64: ; CHECK: # %bb.0: # %entry @@ -3353,6 +3559,50 @@ entry: ret <4 x double> %nearby } +define <4 x double> @constrained_vector_nearbyint_v4f64_var(ptr %a) #0 { +; CHECK-LABEL: constrained_vector_nearbyint_v4f64_var: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $56, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: movaps (%rdi), %xmm1 +; CHECK-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps 16(%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: callq nearbyint@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: callq nearbyint@PLT +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: callq nearbyint@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: callq nearbyint@PLT +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: addq $56, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_nearbyint_v4f64_var: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vroundpd $12, (%rdi), %ymm0 +; AVX-NEXT: retq +entry: + %b = load <4 x double>, ptr %a + %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64( + <4 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %nearby +} + define <1 x float> @constrained_vector_maxnum_v1f32() #0 { ; CHECK-LABEL: constrained_vector_maxnum_v1f32: ; CHECK: # %bb.0: # %entry @@ -4482,10 +4732,10 @@ define <1 x i64> @constrained_vector_fptoui_v1i64_v1f32() #0 { ; CHECK-NEXT: movss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] ; CHECK-NEXT: comiss %xmm0, %xmm2 ; CHECK-NEXT: xorps %xmm1, %xmm1 -; CHECK-NEXT: ja .LBB115_2 +; CHECK-NEXT: ja .LBB121_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: movaps %xmm2, %xmm1 -; CHECK-NEXT: .LBB115_2: # %entry +; CHECK-NEXT: .LBB121_2: # %entry ; CHECK-NEXT: subss %xmm1, %xmm0 ; CHECK-NEXT: cvttss2si %xmm0, %rcx ; CHECK-NEXT: setbe %al @@ -4500,10 +4750,10 @@ define <1 x i64> @constrained_vector_fptoui_v1i64_v1f32() #0 { ; AVX1-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] ; AVX1-NEXT: vcomiss %xmm0, %xmm1 ; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: ja .LBB115_2 +; AVX1-NEXT: ja .LBB121_2 ; AVX1-NEXT: # %bb.1: # %entry ; AVX1-NEXT: vmovaps %xmm1, %xmm2 -; AVX1-NEXT: .LBB115_2: # %entry +; AVX1-NEXT: .LBB121_2: # %entry ; AVX1-NEXT: vsubss %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vcvttss2si %xmm0, %rcx ; AVX1-NEXT: setbe %al @@ -4531,10 +4781,10 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f32() #0 { ; CHECK-NEXT: comiss %xmm2, %xmm1 ; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: xorps %xmm3, %xmm3 -; CHECK-NEXT: ja .LBB116_2 +; CHECK-NEXT: ja .LBB122_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: movaps %xmm1, %xmm3 -; CHECK-NEXT: .LBB116_2: # %entry +; CHECK-NEXT: .LBB122_2: # %entry ; CHECK-NEXT: subss %xmm3, %xmm2 ; CHECK-NEXT: cvttss2si %xmm2, %rax ; CHECK-NEXT: setbe %cl @@ -4544,10 +4794,10 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f32() #0 { ; CHECK-NEXT: movq %rcx, %xmm2 ; CHECK-NEXT: movss {{.*#+}} xmm3 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] ; CHECK-NEXT: comiss %xmm3, %xmm1 -; CHECK-NEXT: ja .LBB116_4 +; CHECK-NEXT: ja .LBB122_4 ; CHECK-NEXT: # %bb.3: # %entry ; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: .LBB116_4: # %entry +; CHECK-NEXT: .LBB122_4: # %entry ; CHECK-NEXT: subss %xmm0, %xmm3 ; CHECK-NEXT: cvttss2si %xmm3, %rax ; CHECK-NEXT: setbe %cl @@ -4565,10 +4815,10 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f32() #0 { ; AVX1-NEXT: vcomiss %xmm2, %xmm0 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: ja .LBB116_2 +; AVX1-NEXT: ja .LBB122_2 ; AVX1-NEXT: # %bb.1: # %entry ; AVX1-NEXT: vmovaps %xmm0, %xmm3 -; AVX1-NEXT: .LBB116_2: # %entry +; AVX1-NEXT: .LBB122_2: # %entry ; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vcvttss2si %xmm2, %rax ; AVX1-NEXT: setbe %cl @@ -4578,10 +4828,10 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f32() #0 { ; AVX1-NEXT: vmovq %rcx, %xmm2 ; AVX1-NEXT: vmovss {{.*#+}} xmm3 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] ; AVX1-NEXT: vcomiss %xmm3, %xmm0 -; AVX1-NEXT: ja .LBB116_4 +; AVX1-NEXT: ja .LBB122_4 ; AVX1-NEXT: # %bb.3: # %entry ; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: .LBB116_4: # %entry +; AVX1-NEXT: .LBB122_4: # %entry ; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm0 ; AVX1-NEXT: vcvttss2si %xmm0, %rax ; AVX1-NEXT: setbe %cl @@ -4622,10 +4872,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 { ; CHECK-NEXT: comiss %xmm2, %xmm1 ; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: xorps %xmm3, %xmm3 -; CHECK-NEXT: ja .LBB117_2 +; CHECK-NEXT: ja .LBB123_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: movaps %xmm1, %xmm3 -; CHECK-NEXT: .LBB117_2: # %entry +; CHECK-NEXT: .LBB123_2: # %entry ; CHECK-NEXT: subss %xmm3, %xmm2 ; CHECK-NEXT: cvttss2si %xmm2, %rcx ; CHECK-NEXT: setbe %al @@ -4635,10 +4885,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 { ; CHECK-NEXT: movss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] ; CHECK-NEXT: comiss %xmm2, %xmm1 ; CHECK-NEXT: xorps %xmm3, %xmm3 -; CHECK-NEXT: ja .LBB117_4 +; CHECK-NEXT: ja .LBB123_4 ; CHECK-NEXT: # %bb.3: # %entry ; CHECK-NEXT: movaps %xmm1, %xmm3 -; CHECK-NEXT: .LBB117_4: # %entry +; CHECK-NEXT: .LBB123_4: # %entry ; CHECK-NEXT: subss %xmm3, %xmm2 ; CHECK-NEXT: cvttss2si %xmm2, %rcx ; CHECK-NEXT: setbe %dl @@ -4647,10 +4897,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 { ; CHECK-NEXT: xorq %rcx, %rdx ; CHECK-NEXT: movss {{.*#+}} xmm2 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] ; CHECK-NEXT: comiss %xmm2, %xmm1 -; CHECK-NEXT: ja .LBB117_6 +; CHECK-NEXT: ja .LBB123_6 ; CHECK-NEXT: # %bb.5: # %entry ; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: .LBB117_6: # %entry +; CHECK-NEXT: .LBB123_6: # %entry ; CHECK-NEXT: subss %xmm0, %xmm2 ; CHECK-NEXT: cvttss2si %xmm2, %rsi ; CHECK-NEXT: setbe %cl @@ -4666,10 +4916,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 { ; AVX1-NEXT: vcomiss %xmm2, %xmm0 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: ja .LBB117_2 +; AVX1-NEXT: ja .LBB123_2 ; AVX1-NEXT: # %bb.1: # %entry ; AVX1-NEXT: vmovaps %xmm0, %xmm3 -; AVX1-NEXT: .LBB117_2: # %entry +; AVX1-NEXT: .LBB123_2: # %entry ; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vcvttss2si %xmm2, %rax ; AVX1-NEXT: setbe %cl @@ -4680,10 +4930,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 { ; AVX1-NEXT: vmovss {{.*#+}} xmm3 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] ; AVX1-NEXT: vcomiss %xmm3, %xmm0 ; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4 -; AVX1-NEXT: ja .LBB117_4 +; AVX1-NEXT: ja .LBB123_4 ; AVX1-NEXT: # %bb.3: # %entry ; AVX1-NEXT: vmovaps %xmm0, %xmm4 -; AVX1-NEXT: .LBB117_4: # %entry +; AVX1-NEXT: .LBB123_4: # %entry ; AVX1-NEXT: vsubss %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vcvttss2si %xmm3, %rax ; AVX1-NEXT: setbe %cl @@ -4694,10 +4944,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 { ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] ; AVX1-NEXT: vmovss {{.*#+}} xmm3 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] ; AVX1-NEXT: vcomiss %xmm3, %xmm0 -; AVX1-NEXT: ja .LBB117_6 +; AVX1-NEXT: ja .LBB123_6 ; AVX1-NEXT: # %bb.5: # %entry ; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: .LBB117_6: # %entry +; AVX1-NEXT: .LBB123_6: # %entry ; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm0 ; AVX1-NEXT: vcvttss2si %xmm0, %rax ; AVX1-NEXT: setbe %cl @@ -4735,10 +4985,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 { ; CHECK-NEXT: comiss %xmm0, %xmm2 ; CHECK-NEXT: xorps %xmm1, %xmm1 ; CHECK-NEXT: xorps %xmm3, %xmm3 -; CHECK-NEXT: ja .LBB118_2 +; CHECK-NEXT: ja .LBB124_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: movaps %xmm2, %xmm3 -; CHECK-NEXT: .LBB118_2: # %entry +; CHECK-NEXT: .LBB124_2: # %entry ; CHECK-NEXT: subss %xmm3, %xmm0 ; CHECK-NEXT: cvttss2si %xmm0, %rcx ; CHECK-NEXT: setbe %al @@ -4748,10 +4998,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 { ; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] ; CHECK-NEXT: comiss %xmm0, %xmm2 ; CHECK-NEXT: xorps %xmm4, %xmm4 -; CHECK-NEXT: ja .LBB118_4 +; CHECK-NEXT: ja .LBB124_4 ; CHECK-NEXT: # %bb.3: # %entry ; CHECK-NEXT: movaps %xmm2, %xmm4 -; CHECK-NEXT: .LBB118_4: # %entry +; CHECK-NEXT: .LBB124_4: # %entry ; CHECK-NEXT: movq %rax, %xmm3 ; CHECK-NEXT: subss %xmm4, %xmm0 ; CHECK-NEXT: cvttss2si %xmm0, %rax @@ -4763,10 +5013,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 { ; CHECK-NEXT: movss {{.*#+}} xmm4 = [4.5E+1,0.0E+0,0.0E+0,0.0E+0] ; CHECK-NEXT: comiss %xmm4, %xmm2 ; CHECK-NEXT: xorps %xmm5, %xmm5 -; CHECK-NEXT: ja .LBB118_6 +; CHECK-NEXT: ja .LBB124_6 ; CHECK-NEXT: # %bb.5: # %entry ; CHECK-NEXT: movaps %xmm2, %xmm5 -; CHECK-NEXT: .LBB118_6: # %entry +; CHECK-NEXT: .LBB124_6: # %entry ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; CHECK-NEXT: subss %xmm5, %xmm4 ; CHECK-NEXT: cvttss2si %xmm4, %rax @@ -4777,10 +5027,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 { ; CHECK-NEXT: movq %rcx, %xmm3 ; CHECK-NEXT: movss {{.*#+}} xmm4 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] ; CHECK-NEXT: comiss %xmm4, %xmm2 -; CHECK-NEXT: ja .LBB118_8 +; CHECK-NEXT: ja .LBB124_8 ; CHECK-NEXT: # %bb.7: # %entry ; CHECK-NEXT: movaps %xmm2, %xmm1 -; CHECK-NEXT: .LBB118_8: # %entry +; CHECK-NEXT: .LBB124_8: # %entry ; CHECK-NEXT: subss %xmm1, %xmm4 ; CHECK-NEXT: cvttss2si %xmm4, %rax ; CHECK-NEXT: setbe %cl @@ -4798,10 +5048,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 { ; AVX1-NEXT: vcomiss %xmm2, %xmm0 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: ja .LBB118_2 +; AVX1-NEXT: ja .LBB124_2 ; AVX1-NEXT: # %bb.1: # %entry ; AVX1-NEXT: vmovaps %xmm0, %xmm3 -; AVX1-NEXT: .LBB118_2: # %entry +; AVX1-NEXT: .LBB124_2: # %entry ; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vcvttss2si %xmm2, %rcx ; AVX1-NEXT: setbe %al @@ -4811,10 +5061,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 { ; AVX1-NEXT: vmovss {{.*#+}} xmm3 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] ; AVX1-NEXT: vcomiss %xmm3, %xmm0 ; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4 -; AVX1-NEXT: ja .LBB118_4 +; AVX1-NEXT: ja .LBB124_4 ; AVX1-NEXT: # %bb.3: # %entry ; AVX1-NEXT: vmovaps %xmm0, %xmm4 -; AVX1-NEXT: .LBB118_4: # %entry +; AVX1-NEXT: .LBB124_4: # %entry ; AVX1-NEXT: vmovq %rax, %xmm2 ; AVX1-NEXT: vsubss %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vcvttss2si %xmm3, %rax @@ -4826,10 +5076,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 { ; AVX1-NEXT: vmovss {{.*#+}} xmm4 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] ; AVX1-NEXT: vcomiss %xmm4, %xmm0 ; AVX1-NEXT: vxorps %xmm5, %xmm5, %xmm5 -; AVX1-NEXT: ja .LBB118_6 +; AVX1-NEXT: ja .LBB124_6 ; AVX1-NEXT: # %bb.5: # %entry ; AVX1-NEXT: vmovaps %xmm0, %xmm5 -; AVX1-NEXT: .LBB118_6: # %entry +; AVX1-NEXT: .LBB124_6: # %entry ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] ; AVX1-NEXT: vsubss %xmm5, %xmm4, %xmm3 ; AVX1-NEXT: vcvttss2si %xmm3, %rax @@ -4840,10 +5090,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 { ; AVX1-NEXT: vmovq %rcx, %xmm3 ; AVX1-NEXT: vmovss {{.*#+}} xmm4 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] ; AVX1-NEXT: vcomiss %xmm4, %xmm0 -; AVX1-NEXT: ja .LBB118_8 +; AVX1-NEXT: ja .LBB124_8 ; AVX1-NEXT: # %bb.7: # %entry ; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: .LBB118_8: # %entry +; AVX1-NEXT: .LBB124_8: # %entry ; AVX1-NEXT: vsubss %xmm1, %xmm4, %xmm0 ; AVX1-NEXT: vcvttss2si %xmm0, %rax ; AVX1-NEXT: setbe %cl @@ -5036,10 +5286,10 @@ define <1 x i64> @constrained_vector_fptoui_v1i64_v1f64() #0 { ; CHECK-NEXT: movsd {{.*#+}} xmm2 = [9.2233720368547758E+18,0.0E+0] ; CHECK-NEXT: comisd %xmm0, %xmm2 ; CHECK-NEXT: xorpd %xmm1, %xmm1 -; CHECK-NEXT: ja .LBB123_2 +; CHECK-NEXT: ja .LBB129_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: movapd %xmm2, %xmm1 -; CHECK-NEXT: .LBB123_2: # %entry +; CHECK-NEXT: .LBB129_2: # %entry ; CHECK-NEXT: subsd %xmm1, %xmm0 ; CHECK-NEXT: cvttsd2si %xmm0, %rcx ; CHECK-NEXT: setbe %al @@ -5054,10 +5304,10 @@ define <1 x i64> @constrained_vector_fptoui_v1i64_v1f64() #0 { ; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0] ; AVX1-NEXT: vcomisd %xmm0, %xmm1 ; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: ja .LBB123_2 +; AVX1-NEXT: ja .LBB129_2 ; AVX1-NEXT: # %bb.1: # %entry ; AVX1-NEXT: vmovapd %xmm1, %xmm2 -; AVX1-NEXT: .LBB123_2: # %entry +; AVX1-NEXT: .LBB129_2: # %entry ; AVX1-NEXT: vsubsd %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vcvttsd2si %xmm0, %rcx ; AVX1-NEXT: setbe %al @@ -5085,10 +5335,10 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f64() #0 { ; CHECK-NEXT: comisd %xmm2, %xmm1 ; CHECK-NEXT: xorpd %xmm0, %xmm0 ; CHECK-NEXT: xorpd %xmm3, %xmm3 -; CHECK-NEXT: ja .LBB124_2 +; CHECK-NEXT: ja .LBB130_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: movapd %xmm1, %xmm3 -; CHECK-NEXT: .LBB124_2: # %entry +; CHECK-NEXT: .LBB130_2: # %entry ; CHECK-NEXT: subsd %xmm3, %xmm2 ; CHECK-NEXT: cvttsd2si %xmm2, %rax ; CHECK-NEXT: setbe %cl @@ -5098,10 +5348,10 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f64() #0 { ; CHECK-NEXT: movq %rcx, %xmm2 ; CHECK-NEXT: movsd {{.*#+}} xmm3 = [4.2100000000000001E+1,0.0E+0] ; CHECK-NEXT: comisd %xmm3, %xmm1 -; CHECK-NEXT: ja .LBB124_4 +; CHECK-NEXT: ja .LBB130_4 ; CHECK-NEXT: # %bb.3: # %entry ; CHECK-NEXT: movapd %xmm1, %xmm0 -; CHECK-NEXT: .LBB124_4: # %entry +; CHECK-NEXT: .LBB130_4: # %entry ; CHECK-NEXT: subsd %xmm0, %xmm3 ; CHECK-NEXT: cvttsd2si %xmm3, %rax ; CHECK-NEXT: setbe %cl @@ -5119,10 +5369,10 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f64() #0 { ; AVX1-NEXT: vcomisd %xmm2, %xmm0 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vxorpd %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: ja .LBB124_2 +; AVX1-NEXT: ja .LBB130_2 ; AVX1-NEXT: # %bb.1: # %entry ; AVX1-NEXT: vmovapd %xmm0, %xmm3 -; AVX1-NEXT: .LBB124_2: # %entry +; AVX1-NEXT: .LBB130_2: # %entry ; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vcvttsd2si %xmm2, %rax ; AVX1-NEXT: setbe %cl @@ -5132,10 +5382,10 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f64() #0 { ; AVX1-NEXT: vmovq %rcx, %xmm2 ; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = [4.2100000000000001E+1,0.0E+0] ; AVX1-NEXT: vcomisd %xmm3, %xmm0 -; AVX1-NEXT: ja .LBB124_4 +; AVX1-NEXT: ja .LBB130_4 ; AVX1-NEXT: # %bb.3: # %entry ; AVX1-NEXT: vmovapd %xmm0, %xmm1 -; AVX1-NEXT: .LBB124_4: # %entry +; AVX1-NEXT: .LBB130_4: # %entry ; AVX1-NEXT: vsubsd %xmm1, %xmm3, %xmm0 ; AVX1-NEXT: vcvttsd2si %xmm0, %rax ; AVX1-NEXT: setbe %cl @@ -5177,10 +5427,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 { ; CHECK-NEXT: comisd %xmm2, %xmm1 ; CHECK-NEXT: xorpd %xmm0, %xmm0 ; CHECK-NEXT: xorpd %xmm3, %xmm3 -; CHECK-NEXT: ja .LBB125_2 +; CHECK-NEXT: ja .LBB131_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: movapd %xmm1, %xmm3 -; CHECK-NEXT: .LBB125_2: # %entry +; CHECK-NEXT: .LBB131_2: # %entry ; CHECK-NEXT: subsd %xmm3, %xmm2 ; CHECK-NEXT: cvttsd2si %xmm2, %rcx ; CHECK-NEXT: setbe %al @@ -5190,10 +5440,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 { ; CHECK-NEXT: movsd {{.*#+}} xmm2 = [4.2200000000000003E+1,0.0E+0] ; CHECK-NEXT: comisd %xmm2, %xmm1 ; CHECK-NEXT: xorpd %xmm3, %xmm3 -; CHECK-NEXT: ja .LBB125_4 +; CHECK-NEXT: ja .LBB131_4 ; CHECK-NEXT: # %bb.3: # %entry ; CHECK-NEXT: movapd %xmm1, %xmm3 -; CHECK-NEXT: .LBB125_4: # %entry +; CHECK-NEXT: .LBB131_4: # %entry ; CHECK-NEXT: subsd %xmm3, %xmm2 ; CHECK-NEXT: cvttsd2si %xmm2, %rcx ; CHECK-NEXT: setbe %dl @@ -5202,10 +5452,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 { ; CHECK-NEXT: xorq %rcx, %rdx ; CHECK-NEXT: movsd {{.*#+}} xmm2 = [4.2299999999999997E+1,0.0E+0] ; CHECK-NEXT: comisd %xmm2, %xmm1 -; CHECK-NEXT: ja .LBB125_6 +; CHECK-NEXT: ja .LBB131_6 ; CHECK-NEXT: # %bb.5: # %entry ; CHECK-NEXT: movapd %xmm1, %xmm0 -; CHECK-NEXT: .LBB125_6: # %entry +; CHECK-NEXT: .LBB131_6: # %entry ; CHECK-NEXT: subsd %xmm0, %xmm2 ; CHECK-NEXT: cvttsd2si %xmm2, %rsi ; CHECK-NEXT: setbe %cl @@ -5221,10 +5471,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 { ; AVX1-NEXT: vcomisd %xmm2, %xmm0 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vxorpd %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: ja .LBB125_2 +; AVX1-NEXT: ja .LBB131_2 ; AVX1-NEXT: # %bb.1: # %entry ; AVX1-NEXT: vmovapd %xmm0, %xmm3 -; AVX1-NEXT: .LBB125_2: # %entry +; AVX1-NEXT: .LBB131_2: # %entry ; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vcvttsd2si %xmm2, %rax ; AVX1-NEXT: setbe %cl @@ -5235,10 +5485,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 { ; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = [4.2100000000000001E+1,0.0E+0] ; AVX1-NEXT: vcomisd %xmm3, %xmm0 ; AVX1-NEXT: vxorpd %xmm4, %xmm4, %xmm4 -; AVX1-NEXT: ja .LBB125_4 +; AVX1-NEXT: ja .LBB131_4 ; AVX1-NEXT: # %bb.3: # %entry ; AVX1-NEXT: vmovapd %xmm0, %xmm4 -; AVX1-NEXT: .LBB125_4: # %entry +; AVX1-NEXT: .LBB131_4: # %entry ; AVX1-NEXT: vsubsd %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vcvttsd2si %xmm3, %rax ; AVX1-NEXT: setbe %cl @@ -5249,10 +5499,10 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 { ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] ; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = [4.2299999999999997E+1,0.0E+0] ; AVX1-NEXT: vcomisd %xmm3, %xmm0 -; AVX1-NEXT: ja .LBB125_6 +; AVX1-NEXT: ja .LBB131_6 ; AVX1-NEXT: # %bb.5: # %entry ; AVX1-NEXT: vmovapd %xmm0, %xmm1 -; AVX1-NEXT: .LBB125_6: # %entry +; AVX1-NEXT: .LBB131_6: # %entry ; AVX1-NEXT: vsubsd %xmm1, %xmm3, %xmm0 ; AVX1-NEXT: vcvttsd2si %xmm0, %rax ; AVX1-NEXT: setbe %cl @@ -5290,10 +5540,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 { ; CHECK-NEXT: comisd %xmm0, %xmm2 ; CHECK-NEXT: xorpd %xmm1, %xmm1 ; CHECK-NEXT: xorpd %xmm3, %xmm3 -; CHECK-NEXT: ja .LBB126_2 +; CHECK-NEXT: ja .LBB132_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: movapd %xmm2, %xmm3 -; CHECK-NEXT: .LBB126_2: # %entry +; CHECK-NEXT: .LBB132_2: # %entry ; CHECK-NEXT: subsd %xmm3, %xmm0 ; CHECK-NEXT: cvttsd2si %xmm0, %rcx ; CHECK-NEXT: setbe %al @@ -5303,10 +5553,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 { ; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] ; CHECK-NEXT: comisd %xmm0, %xmm2 ; CHECK-NEXT: xorpd %xmm4, %xmm4 -; CHECK-NEXT: ja .LBB126_4 +; CHECK-NEXT: ja .LBB132_4 ; CHECK-NEXT: # %bb.3: # %entry ; CHECK-NEXT: movapd %xmm2, %xmm4 -; CHECK-NEXT: .LBB126_4: # %entry +; CHECK-NEXT: .LBB132_4: # %entry ; CHECK-NEXT: movq %rax, %xmm3 ; CHECK-NEXT: subsd %xmm4, %xmm0 ; CHECK-NEXT: cvttsd2si %xmm0, %rax @@ -5318,10 +5568,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 { ; CHECK-NEXT: movsd {{.*#+}} xmm4 = [4.2399999999999999E+1,0.0E+0] ; CHECK-NEXT: comisd %xmm4, %xmm2 ; CHECK-NEXT: xorpd %xmm5, %xmm5 -; CHECK-NEXT: ja .LBB126_6 +; CHECK-NEXT: ja .LBB132_6 ; CHECK-NEXT: # %bb.5: # %entry ; CHECK-NEXT: movapd %xmm2, %xmm5 -; CHECK-NEXT: .LBB126_6: # %entry +; CHECK-NEXT: .LBB132_6: # %entry ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; CHECK-NEXT: subsd %xmm5, %xmm4 ; CHECK-NEXT: cvttsd2si %xmm4, %rax @@ -5332,10 +5582,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 { ; CHECK-NEXT: movq %rcx, %xmm3 ; CHECK-NEXT: movsd {{.*#+}} xmm4 = [4.2299999999999997E+1,0.0E+0] ; CHECK-NEXT: comisd %xmm4, %xmm2 -; CHECK-NEXT: ja .LBB126_8 +; CHECK-NEXT: ja .LBB132_8 ; CHECK-NEXT: # %bb.7: # %entry ; CHECK-NEXT: movapd %xmm2, %xmm1 -; CHECK-NEXT: .LBB126_8: # %entry +; CHECK-NEXT: .LBB132_8: # %entry ; CHECK-NEXT: subsd %xmm1, %xmm4 ; CHECK-NEXT: cvttsd2si %xmm4, %rax ; CHECK-NEXT: setbe %cl @@ -5353,10 +5603,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 { ; AVX1-NEXT: vcomisd %xmm2, %xmm0 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vxorpd %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: ja .LBB126_2 +; AVX1-NEXT: ja .LBB132_2 ; AVX1-NEXT: # %bb.1: # %entry ; AVX1-NEXT: vmovapd %xmm0, %xmm3 -; AVX1-NEXT: .LBB126_2: # %entry +; AVX1-NEXT: .LBB132_2: # %entry ; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vcvttsd2si %xmm2, %rcx ; AVX1-NEXT: setbe %al @@ -5366,10 +5616,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 { ; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = [4.2299999999999997E+1,0.0E+0] ; AVX1-NEXT: vcomisd %xmm3, %xmm0 ; AVX1-NEXT: vxorpd %xmm4, %xmm4, %xmm4 -; AVX1-NEXT: ja .LBB126_4 +; AVX1-NEXT: ja .LBB132_4 ; AVX1-NEXT: # %bb.3: # %entry ; AVX1-NEXT: vmovapd %xmm0, %xmm4 -; AVX1-NEXT: .LBB126_4: # %entry +; AVX1-NEXT: .LBB132_4: # %entry ; AVX1-NEXT: vmovq %rax, %xmm2 ; AVX1-NEXT: vsubsd %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vcvttsd2si %xmm3, %rax @@ -5381,10 +5631,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 { ; AVX1-NEXT: vmovsd {{.*#+}} xmm4 = [4.2200000000000003E+1,0.0E+0] ; AVX1-NEXT: vcomisd %xmm4, %xmm0 ; AVX1-NEXT: vxorpd %xmm5, %xmm5, %xmm5 -; AVX1-NEXT: ja .LBB126_6 +; AVX1-NEXT: ja .LBB132_6 ; AVX1-NEXT: # %bb.5: # %entry ; AVX1-NEXT: vmovapd %xmm0, %xmm5 -; AVX1-NEXT: .LBB126_6: # %entry +; AVX1-NEXT: .LBB132_6: # %entry ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] ; AVX1-NEXT: vsubsd %xmm5, %xmm4, %xmm3 ; AVX1-NEXT: vcvttsd2si %xmm3, %rax @@ -5395,10 +5645,10 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 { ; AVX1-NEXT: vmovq %rcx, %xmm3 ; AVX1-NEXT: vmovsd {{.*#+}} xmm4 = [4.2100000000000001E+1,0.0E+0] ; AVX1-NEXT: vcomisd %xmm4, %xmm0 -; AVX1-NEXT: ja .LBB126_8 +; AVX1-NEXT: ja .LBB132_8 ; AVX1-NEXT: # %bb.7: # %entry ; AVX1-NEXT: vmovapd %xmm0, %xmm1 -; AVX1-NEXT: .LBB126_8: # %entry +; AVX1-NEXT: .LBB132_8: # %entry ; AVX1-NEXT: vsubsd %xmm1, %xmm4, %xmm0 ; AVX1-NEXT: vcvttsd2si %xmm0, %rax ; AVX1-NEXT: setbe %cl @@ -5620,108 +5870,121 @@ entry: ret <4 x double> %result } -define <1 x float> @constrained_vector_ceil_v1f32() #0 { -; CHECK-LABEL: constrained_vector_ceil_v1f32: +define <1 x float> @constrained_vector_ceil_v1f32_var(ptr %a) #0 { +; CHECK-LABEL: constrained_vector_ceil_v1f32_var: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: callq ceilf@PLT ; CHECK-NEXT: popq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_ceil_v1f32: +; AVX-LABEL: constrained_vector_ceil_v1f32_var: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq entry: + %b = load <1 x float>, ptr %a %ceil = call <1 x float> @llvm.experimental.constrained.ceil.v1f32( - <1 x float> , + <1 x float> %b, metadata !"fpexcept.strict") #0 ret <1 x float> %ceil } -define <2 x double> @constrained_vector_ceil_v2f64() #0 { -; CHECK-LABEL: constrained_vector_ceil_v2f64: +define <2 x double> @constrained_vector_ceil_v2f64_var(ptr %a) #0 { +; CHECK-LABEL: constrained_vector_ceil_v2f64_var: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0] -; CHECK-NEXT: callq ceil@PLT +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movaps (%rdi), %xmm0 ; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0] ; CHECK-NEXT: callq ceil@PLT -; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload -; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] -; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: callq ceil@PLT +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: addq $40, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_ceil_v2f64: +; AVX-LABEL: constrained_vector_ceil_v2f64_var: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vroundpd $10, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; AVX-NEXT: vroundpd $10, (%rdi), %xmm0 ; AVX-NEXT: retq entry: + %b = load <2 x double>, ptr %a %ceil = call <2 x double> @llvm.experimental.constrained.ceil.v2f64( - <2 x double> , + <2 x double> %b, metadata !"fpexcept.strict") #0 ret <2 x double> %ceil } -define <3 x float> @constrained_vector_ceil_v3f32() #0 { -; CHECK-LABEL: constrained_vector_ceil_v3f32: +define <3 x float> @constrained_vector_ceil_v3f32_var(ptr %a) #0 { +; CHECK-LABEL: constrained_vector_ceil_v3f32_var: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $40, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: movss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: subq $56, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: movaps (%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; CHECK-NEXT: callq ceilf@PLT ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] ; CHECK-NEXT: callq ceilf@PLT -; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movss {{.*#+}} xmm0 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: callq ceilf@PLT -; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload -; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload -; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $56, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_ceil_v3f32: +; AVX-LABEL: constrained_vector_ceil_v3f32_var: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vmovss {{.*#+}} xmm1 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; AVX-NEXT: vroundss $10, %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vmovss {{.*#+}} xmm2 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0] ; AVX-NEXT: vroundss $10, %xmm2, %xmm2, %xmm2 ; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] ; AVX-NEXT: retq entry: + %b = load <3 x float>, ptr %a %ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32( - <3 x float> , + <3 x float> %b, metadata !"fpexcept.strict") #0 ret <3 x float> %ceil } -define <3 x double> @constrained_vector_ceil_v3f64() #0 { -; CHECK-LABEL: constrained_vector_ceil_v3f64: +define <3 x double> @constrained_vector_ceil_v3f64_var(ptr %a) #0 { +; CHECK-LABEL: constrained_vector_ceil_v3f64_var: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0] +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movaps (%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; CHECK-NEXT: callq ceil@PLT ; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0] +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: callq ceil@PLT ; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill -; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0] +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: callq ceil@PLT ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) @@ -5730,127 +5993,141 @@ define <3 x double> @constrained_vector_ceil_v3f64() #0 { ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload ; CHECK-NEXT: # xmm1 = mem[0],zero -; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: addq $40, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_ceil_v3f64: +; AVX-LABEL: constrained_vector_ceil_v3f64_var: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0] +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vroundpd $10, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; AVX-NEXT: vroundpd $10, (%rdi), %xmm1 ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX-NEXT: retq entry: + %b = load <3 x double>, ptr %a %ceil = call <3 x double> @llvm.experimental.constrained.ceil.v3f64( - <3 x double> , + <3 x double> %b, metadata !"fpexcept.strict") #0 ret <3 x double> %ceil } -define <1 x float> @constrained_vector_floor_v1f32() #0 { -; CHECK-LABEL: constrained_vector_floor_v1f32: +define <1 x float> @constrained_vector_floor_v1f32_var(ptr %a) #0 { +; CHECK-LABEL: constrained_vector_floor_v1f32_var: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: callq floorf@PLT ; CHECK-NEXT: popq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_floor_v1f32: +; AVX-LABEL: constrained_vector_floor_v1f32_var: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq entry: + %b = load <1 x float>, ptr %a %floor = call <1 x float> @llvm.experimental.constrained.floor.v1f32( - <1 x float> , + <1 x float> %b, metadata !"fpexcept.strict") #0 ret <1 x float> %floor } -define <2 x double> @constrained_vector_floor_v2f64() #0 { -; CHECK-LABEL: constrained_vector_floor_v2f64: +define <2 x double> @constrained_vector_floor_v2f64_var(ptr %a) #0 { +; CHECK-LABEL: constrained_vector_floor_v2f64_var: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0] -; CHECK-NEXT: callq floor@PLT +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movaps (%rdi), %xmm0 ; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0] ; CHECK-NEXT: callq floor@PLT -; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload -; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] -; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: callq floor@PLT +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: addq $40, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_floor_v2f64: +; AVX-LABEL: constrained_vector_floor_v2f64_var: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vroundpd $9, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; AVX-NEXT: vroundpd $9, (%rdi), %xmm0 ; AVX-NEXT: retq entry: + %b = load <2 x double>, ptr %a %floor = call <2 x double> @llvm.experimental.constrained.floor.v2f64( - <2 x double> , + <2 x double> %b, metadata !"fpexcept.strict") #0 ret <2 x double> %floor } -define <3 x float> @constrained_vector_floor_v3f32() #0 { -; CHECK-LABEL: constrained_vector_floor_v3f32: +define <3 x float> @constrained_vector_floor_v3f32_var(ptr %a) #0 { +; CHECK-LABEL: constrained_vector_floor_v3f32_var: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $40, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: movss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: subq $56, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: movaps (%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; CHECK-NEXT: callq floorf@PLT ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] ; CHECK-NEXT: callq floorf@PLT -; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movss {{.*#+}} xmm0 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: callq floorf@PLT -; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload -; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload -; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $56, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_floor_v3f32: +; AVX-LABEL: constrained_vector_floor_v3f32_var: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vmovss {{.*#+}} xmm1 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; AVX-NEXT: vroundss $9, %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vmovss {{.*#+}} xmm2 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0] ; AVX-NEXT: vroundss $9, %xmm2, %xmm2, %xmm2 ; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] ; AVX-NEXT: retq entry: + %b = load <3 x float>, ptr %a %floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32( - <3 x float> , + <3 x float> %b, metadata !"fpexcept.strict") #0 ret <3 x float> %floor } -define <3 x double> @constrained_vector_floor_v3f64() #0 { -; CHECK-LABEL: constrained_vector_floor_v3f64: +define <3 x double> @constrained_vector_floor_v3f64_var(ptr %a) #0 { +; CHECK-LABEL: constrained_vector_floor_v3f64_var: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0] +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movaps (%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; CHECK-NEXT: callq floor@PLT ; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0] +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: callq floor@PLT ; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill -; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0] +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: callq floor@PLT ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) @@ -5859,149 +6136,175 @@ define <3 x double> @constrained_vector_floor_v3f64() #0 { ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload ; CHECK-NEXT: # xmm1 = mem[0],zero -; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: addq $40, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_floor_v3f64: +; AVX-LABEL: constrained_vector_floor_v3f64_var: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0] +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vroundpd $9, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; AVX-NEXT: vroundpd $9, (%rdi), %xmm1 ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX-NEXT: retq entry: + %b = load <3 x double>, ptr %a %floor = call <3 x double> @llvm.experimental.constrained.floor.v3f64( - <3 x double> , + <3 x double> %b, metadata !"fpexcept.strict") #0 ret <3 x double> %floor } -define <1 x float> @constrained_vector_round_v1f32() #0 { -; CHECK-LABEL: constrained_vector_round_v1f32: +define <1 x float> @constrained_vector_round_v1f32_var(ptr %a) #0 { +; CHECK-LABEL: constrained_vector_round_v1f32_var: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: callq roundf@PLT ; CHECK-NEXT: popq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_round_v1f32: +; AVX-LABEL: constrained_vector_round_v1f32_var: ; AVX: # %bb.0: # %entry ; AVX-NEXT: pushq %rax ; AVX-NEXT: .cfi_def_cfa_offset 16 -; AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-NEXT: callq roundf@PLT ; AVX-NEXT: popq %rax ; AVX-NEXT: .cfi_def_cfa_offset 8 ; AVX-NEXT: retq entry: + %b = load <1 x float>, ptr %a %round = call <1 x float> @llvm.experimental.constrained.round.v1f32( - <1 x float> , + <1 x float> %b, metadata !"fpexcept.strict") #0 ret <1 x float> %round } -define <2 x double> @constrained_vector_round_v2f64() #0 { -; CHECK-LABEL: constrained_vector_round_v2f64: +define <2 x double> @constrained_vector_round_v2f64_var(ptr %a) #0 { +; CHECK-LABEL: constrained_vector_round_v2f64_var: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0] -; CHECK-NEXT: callq round@PLT +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movaps (%rdi), %xmm0 ; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0] ; CHECK-NEXT: callq round@PLT -; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload -; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] -; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: callq round@PLT +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: addq $40, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_round_v2f64: +; AVX-LABEL: constrained_vector_round_v2f64_var: ; AVX: # %bb.0: # %entry -; AVX-NEXT: subq $24, %rsp -; AVX-NEXT: .cfi_def_cfa_offset 32 -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0] +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-NEXT: callq round@PLT -; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0] +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; AVX-NEXT: # xmm0 = mem[0],zero ; AVX-NEXT: callq round@PLT -; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload ; AVX-NEXT: # xmm0 = xmm0[0],mem[0] -; AVX-NEXT: addq $24, %rsp +; AVX-NEXT: addq $40, %rsp ; AVX-NEXT: .cfi_def_cfa_offset 8 ; AVX-NEXT: retq entry: + %b = load <2 x double>, ptr %a %round = call <2 x double> @llvm.experimental.constrained.round.v2f64( - <2 x double> , + <2 x double> %b, metadata !"fpexcept.strict") #0 ret <2 x double> %round } -define <3 x float> @constrained_vector_round_v3f32() #0 { -; CHECK-LABEL: constrained_vector_round_v3f32: +define <3 x float> @constrained_vector_round_v3f32_var(ptr %a) #0 { +; CHECK-LABEL: constrained_vector_round_v3f32_var: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $40, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: movss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: subq $56, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: movaps (%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; CHECK-NEXT: callq roundf@PLT ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] ; CHECK-NEXT: callq roundf@PLT -; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movss {{.*#+}} xmm0 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: callq roundf@PLT -; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload -; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload -; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $56, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_round_v3f32: +; AVX-LABEL: constrained_vector_round_v3f32_var: ; AVX: # %bb.0: # %entry -; AVX-NEXT: subq $40, %rsp -; AVX-NEXT: .cfi_def_cfa_offset 48 -; AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: pushq %rbx +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: subq $48, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 64 +; AVX-NEXT: .cfi_offset %rbx, -16 +; AVX-NEXT: movq %rdi, %rbx +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-NEXT: callq roundf@PLT ; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; AVX-NEXT: callq roundf@PLT -; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill -; AVX-NEXT: vmovss {{.*#+}} xmm0 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; AVX-NEXT: # xmm0 = mem[0],zero,zero,zero ; AVX-NEXT: callq roundf@PLT -; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] ; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload ; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] -; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: addq $48, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: popq %rbx ; AVX-NEXT: .cfi_def_cfa_offset 8 ; AVX-NEXT: retq entry: + %b = load <3 x float>, ptr %a %round = call <3 x float> @llvm.experimental.constrained.round.v3f32( - <3 x float> , + <3 x float> %b, metadata !"fpexcept.strict") #0 ret <3 x float> %round } -define <3 x double> @constrained_vector_round_v3f64() #0 { -; CHECK-LABEL: constrained_vector_round_v3f64: +define <3 x double> @constrained_vector_round_v3f64_var(ptr %a) #0 { +; CHECK-LABEL: constrained_vector_round_v3f64_var: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0] +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movaps (%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; CHECK-NEXT: callq round@PLT ; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0] +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: callq round@PLT ; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill -; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0] +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: callq round@PLT ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) @@ -6010,139 +6313,162 @@ define <3 x double> @constrained_vector_round_v3f64() #0 { ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload ; CHECK-NEXT: # xmm1 = mem[0],zero -; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: addq $40, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_round_v3f64: +; AVX-LABEL: constrained_vector_round_v3f64_var: ; AVX: # %bb.0: # %entry -; AVX-NEXT: subq $40, %rsp -; AVX-NEXT: .cfi_def_cfa_offset 48 -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0] +; AVX-NEXT: pushq %rbx +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: subq $48, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 64 +; AVX-NEXT: .cfi_offset %rbx, -16 +; AVX-NEXT: movq %rdi, %rbx +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-NEXT: callq round@PLT ; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0] +; AVX-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; AVX-NEXT: # xmm0 = mem[0],zero ; AVX-NEXT: callq round@PLT ; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload ; AVX-NEXT: # xmm0 = xmm0[0],mem[0] -; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0] +; AVX-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-NEXT: vzeroupper ; AVX-NEXT: callq round@PLT -; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload +; AVX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: addq $48, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: popq %rbx ; AVX-NEXT: .cfi_def_cfa_offset 8 ; AVX-NEXT: retq entry: + %b = load <3 x double>, ptr %a %round = call <3 x double> @llvm.experimental.constrained.round.v3f64( - <3 x double> , + <3 x double> %b, metadata !"fpexcept.strict") #0 ret <3 x double> %round } -define <1 x float> @constrained_vector_trunc_v1f32() #0 { -; CHECK-LABEL: constrained_vector_trunc_v1f32: +define <1 x float> @constrained_vector_trunc_v1f32_var(ptr %a) #0 { +; CHECK-LABEL: constrained_vector_trunc_v1f32_var: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: callq truncf@PLT ; CHECK-NEXT: popq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_trunc_v1f32: +; AVX-LABEL: constrained_vector_trunc_v1f32_var: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq entry: + %b = load <1 x float>, ptr %a %trunc = call <1 x float> @llvm.experimental.constrained.trunc.v1f32( - <1 x float> , + <1 x float> %b, metadata !"fpexcept.strict") #0 ret <1 x float> %trunc } -define <2 x double> @constrained_vector_trunc_v2f64() #0 { -; CHECK-LABEL: constrained_vector_trunc_v2f64: +define <2 x double> @constrained_vector_trunc_v2f64_var(ptr %a) #0 { +; CHECK-LABEL: constrained_vector_trunc_v2f64_var: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0] -; CHECK-NEXT: callq trunc@PLT +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movaps (%rdi), %xmm0 ; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0] ; CHECK-NEXT: callq trunc@PLT -; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload -; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] -; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: callq trunc@PLT +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: addq $40, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_trunc_v2f64: +; AVX-LABEL: constrained_vector_trunc_v2f64_var: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vroundpd $11, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; AVX-NEXT: vroundpd $11, (%rdi), %xmm0 ; AVX-NEXT: retq entry: + %b = load <2 x double>, ptr %a %trunc = call <2 x double> @llvm.experimental.constrained.trunc.v2f64( - <2 x double> , + <2 x double> %b, metadata !"fpexcept.strict") #0 ret <2 x double> %trunc } -define <3 x float> @constrained_vector_trunc_v3f32() #0 { -; CHECK-LABEL: constrained_vector_trunc_v3f32: +define <3 x float> @constrained_vector_trunc_v3f32_var(ptr %a) #0 { +; CHECK-LABEL: constrained_vector_trunc_v3f32_var: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $40, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: movss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: subq $56, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: movaps (%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; CHECK-NEXT: callq truncf@PLT ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] ; CHECK-NEXT: callq truncf@PLT -; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movss {{.*#+}} xmm0 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: callq truncf@PLT -; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload -; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload -; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $56, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_trunc_v3f32: +; AVX-LABEL: constrained_vector_trunc_v3f32_var: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.5E+0,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vmovss {{.*#+}} xmm1 = [1.5E+0,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; AVX-NEXT: vroundss $11, %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vmovss {{.*#+}} xmm2 = [2.5E+0,0.0E+0,0.0E+0,0.0E+0] ; AVX-NEXT: vroundss $11, %xmm2, %xmm2, %xmm2 ; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] ; AVX-NEXT: retq entry: + %b = load <3 x float>, ptr %a %trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32( - <3 x float> , + <3 x float> %b, metadata !"fpexcept.strict") #0 ret <3 x float> %trunc } -define <3 x double> @constrained_vector_trunc_v3f64() #0 { -; CHECK-LABEL: constrained_vector_trunc_v3f64: +define <3 x double> @constrained_vector_trunc_v3f64_var(ptr %a) #0 { +; CHECK-LABEL: constrained_vector_trunc_v3f64_var: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.8999999999999999E+0,0.0E+0] +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movaps (%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; CHECK-NEXT: callq trunc@PLT ; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.1000000000000001E+0,0.0E+0] +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: callq trunc@PLT ; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill -; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0] +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: callq trunc@PLT ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) @@ -6151,20 +6477,21 @@ define <3 x double> @constrained_vector_trunc_v3f64() #0 { ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload ; CHECK-NEXT: # xmm1 = mem[0],zero -; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: addq $40, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_trunc_v3f64: +; AVX-LABEL: constrained_vector_trunc_v3f64_var: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.5E+0,0.0E+0] +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vroundpd $11, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; AVX-NEXT: vroundpd $11, (%rdi), %xmm1 ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX-NEXT: retq entry: + %b = load <3 x double>, ptr %a %trunc = call <3 x double> @llvm.experimental.constrained.trunc.v3f64( - <3 x double> , + <3 x double> %b, metadata !"fpexcept.strict") #0 ret <3 x double> %trunc } @@ -6757,10 +7084,10 @@ define <1 x double> @constrained_vector_uitofp_v1f64_v1i64(<1 x i64> %x) #0 { ; CHECK-NEXT: testq %rdi, %rdi ; CHECK-NEXT: cmovnsq %rdi, %rcx ; CHECK-NEXT: cvtsi2sd %rcx, %xmm0 -; CHECK-NEXT: jns .LBB169_2 +; CHECK-NEXT: jns .LBB175_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: addsd %xmm0, %xmm0 -; CHECK-NEXT: .LBB169_2: # %entry +; CHECK-NEXT: .LBB175_2: # %entry ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_uitofp_v1f64_v1i64: @@ -6773,10 +7100,10 @@ define <1 x double> @constrained_vector_uitofp_v1f64_v1i64(<1 x i64> %x) #0 { ; AVX1-NEXT: testq %rdi, %rdi ; AVX1-NEXT: cmovnsq %rdi, %rcx ; AVX1-NEXT: vcvtsi2sd %rcx, %xmm0, %xmm0 -; AVX1-NEXT: jns .LBB169_2 +; AVX1-NEXT: jns .LBB175_2 ; AVX1-NEXT: # %bb.1: ; AVX1-NEXT: vaddsd %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: .LBB169_2: # %entry +; AVX1-NEXT: .LBB175_2: # %entry ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_uitofp_v1f64_v1i64: @@ -6802,10 +7129,10 @@ define <1 x float> @constrained_vector_uitofp_v1f32_v1i64(<1 x i64> %x) #0 { ; CHECK-NEXT: testq %rdi, %rdi ; CHECK-NEXT: cmovnsq %rdi, %rcx ; CHECK-NEXT: cvtsi2ss %rcx, %xmm0 -; CHECK-NEXT: jns .LBB170_2 +; CHECK-NEXT: jns .LBB176_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: addss %xmm0, %xmm0 -; CHECK-NEXT: .LBB170_2: # %entry +; CHECK-NEXT: .LBB176_2: # %entry ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_uitofp_v1f32_v1i64: @@ -6818,10 +7145,10 @@ define <1 x float> @constrained_vector_uitofp_v1f32_v1i64(<1 x i64> %x) #0 { ; AVX1-NEXT: testq %rdi, %rdi ; AVX1-NEXT: cmovnsq %rdi, %rcx ; AVX1-NEXT: vcvtsi2ss %rcx, %xmm0, %xmm0 -; AVX1-NEXT: jns .LBB170_2 +; AVX1-NEXT: jns .LBB176_2 ; AVX1-NEXT: # %bb.1: ; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: .LBB170_2: # %entry +; AVX1-NEXT: .LBB176_2: # %entry ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_uitofp_v1f32_v1i64: @@ -6920,10 +7247,10 @@ define <2 x double> @constrained_vector_uitofp_v2f64_v2i64(<2 x i64> %x) #0 { ; CHECK-NEXT: cmovnsq %rax, %rdx ; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: cvtsi2sd %rdx, %xmm0 -; CHECK-NEXT: jns .LBB173_2 +; CHECK-NEXT: jns .LBB179_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: addsd %xmm0, %xmm0 -; CHECK-NEXT: .LBB173_2: # %entry +; CHECK-NEXT: .LBB179_2: # %entry ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] ; CHECK-NEXT: movq %xmm1, %rax ; CHECK-NEXT: movq %rax, %rcx @@ -6935,10 +7262,10 @@ define <2 x double> @constrained_vector_uitofp_v2f64_v2i64(<2 x i64> %x) #0 { ; CHECK-NEXT: cmovnsq %rax, %rdx ; CHECK-NEXT: xorps %xmm1, %xmm1 ; CHECK-NEXT: cvtsi2sd %rdx, %xmm1 -; CHECK-NEXT: jns .LBB173_4 +; CHECK-NEXT: jns .LBB179_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: addsd %xmm1, %xmm1 -; CHECK-NEXT: .LBB173_4: # %entry +; CHECK-NEXT: .LBB179_4: # %entry ; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq ; @@ -6953,10 +7280,10 @@ define <2 x double> @constrained_vector_uitofp_v2f64_v2i64(<2 x i64> %x) #0 { ; AVX1-NEXT: testq %rax, %rax ; AVX1-NEXT: cmovnsq %rax, %rdx ; AVX1-NEXT: vcvtsi2sd %rdx, %xmm1, %xmm1 -; AVX1-NEXT: jns .LBB173_2 +; AVX1-NEXT: jns .LBB179_2 ; AVX1-NEXT: # %bb.1: ; AVX1-NEXT: vaddsd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: .LBB173_2: # %entry +; AVX1-NEXT: .LBB179_2: # %entry ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx @@ -6966,10 +7293,10 @@ define <2 x double> @constrained_vector_uitofp_v2f64_v2i64(<2 x i64> %x) #0 { ; AVX1-NEXT: testq %rax, %rax ; AVX1-NEXT: cmovnsq %rax, %rdx ; AVX1-NEXT: vcvtsi2sd %rdx, %xmm2, %xmm0 -; AVX1-NEXT: jns .LBB173_4 +; AVX1-NEXT: jns .LBB179_4 ; AVX1-NEXT: # %bb.3: ; AVX1-NEXT: vaddsd %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: .LBB173_4: # %entry +; AVX1-NEXT: .LBB179_4: # %entry ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX1-NEXT: retq ; @@ -7011,10 +7338,10 @@ define <2 x float> @constrained_vector_uitofp_v2f32_v2i64(<2 x i64> %x) #0 { ; CHECK-NEXT: cmovnsq %rax, %rdx ; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: cvtsi2ss %rdx, %xmm0 -; CHECK-NEXT: jns .LBB174_2 +; CHECK-NEXT: jns .LBB180_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: addss %xmm0, %xmm0 -; CHECK-NEXT: .LBB174_2: # %entry +; CHECK-NEXT: .LBB180_2: # %entry ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] ; CHECK-NEXT: movq %xmm1, %rax ; CHECK-NEXT: movq %rax, %rcx @@ -7026,10 +7353,10 @@ define <2 x float> @constrained_vector_uitofp_v2f32_v2i64(<2 x i64> %x) #0 { ; CHECK-NEXT: cmovnsq %rax, %rdx ; CHECK-NEXT: xorps %xmm1, %xmm1 ; CHECK-NEXT: cvtsi2ss %rdx, %xmm1 -; CHECK-NEXT: jns .LBB174_4 +; CHECK-NEXT: jns .LBB180_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: addss %xmm1, %xmm1 -; CHECK-NEXT: .LBB174_4: # %entry +; CHECK-NEXT: .LBB180_4: # %entry ; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: retq ; @@ -7177,10 +7504,10 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 { ; CHECK-NEXT: testq %rdi, %rdi ; CHECK-NEXT: cmovnsq %rdi, %rcx ; CHECK-NEXT: cvtsi2sd %rcx, %xmm0 -; CHECK-NEXT: jns .LBB177_2 +; CHECK-NEXT: jns .LBB183_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: addsd %xmm0, %xmm0 -; CHECK-NEXT: .LBB177_2: # %entry +; CHECK-NEXT: .LBB183_2: # %entry ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: shrq %rax ; CHECK-NEXT: movl %esi, %ecx @@ -7189,10 +7516,10 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 { ; CHECK-NEXT: testq %rsi, %rsi ; CHECK-NEXT: cmovnsq %rsi, %rcx ; CHECK-NEXT: cvtsi2sd %rcx, %xmm1 -; CHECK-NEXT: jns .LBB177_4 +; CHECK-NEXT: jns .LBB183_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: addsd %xmm1, %xmm1 -; CHECK-NEXT: .LBB177_4: # %entry +; CHECK-NEXT: .LBB183_4: # %entry ; CHECK-NEXT: movq %rdx, %rax ; CHECK-NEXT: shrq %rax ; CHECK-NEXT: movl %edx, %ecx @@ -7201,10 +7528,10 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 { ; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: cmovnsq %rdx, %rcx ; CHECK-NEXT: cvtsi2sd %rcx, %xmm2 -; CHECK-NEXT: jns .LBB177_6 +; CHECK-NEXT: jns .LBB183_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: addsd %xmm2, %xmm2 -; CHECK-NEXT: .LBB177_6: # %entry +; CHECK-NEXT: .LBB183_6: # %entry ; CHECK-NEXT: movsd %xmm2, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) ; CHECK-NEXT: wait @@ -7221,10 +7548,10 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 { ; AVX1-NEXT: testq %rax, %rax ; AVX1-NEXT: cmovnsq %rax, %rdx ; AVX1-NEXT: vcvtsi2sd %rdx, %xmm1, %xmm1 -; AVX1-NEXT: jns .LBB177_2 +; AVX1-NEXT: jns .LBB183_2 ; AVX1-NEXT: # %bb.1: ; AVX1-NEXT: vaddsd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: .LBB177_2: # %entry +; AVX1-NEXT: .LBB183_2: # %entry ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx @@ -7234,10 +7561,10 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 { ; AVX1-NEXT: testq %rax, %rax ; AVX1-NEXT: cmovnsq %rax, %rdx ; AVX1-NEXT: vcvtsi2sd %rdx, %xmm2, %xmm2 -; AVX1-NEXT: jns .LBB177_4 +; AVX1-NEXT: jns .LBB183_4 ; AVX1-NEXT: # %bb.3: ; AVX1-NEXT: vaddsd %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: .LBB177_4: # %entry +; AVX1-NEXT: .LBB183_4: # %entry ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vmovq %xmm0, %rax @@ -7249,10 +7576,10 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 { ; AVX1-NEXT: testq %rax, %rax ; AVX1-NEXT: cmovnsq %rax, %rdx ; AVX1-NEXT: vcvtsi2sd %rdx, %xmm3, %xmm0 -; AVX1-NEXT: jns .LBB177_6 +; AVX1-NEXT: jns .LBB183_6 ; AVX1-NEXT: # %bb.5: ; AVX1-NEXT: vaddsd %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: .LBB177_6: # %entry +; AVX1-NEXT: .LBB183_6: # %entry ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; @@ -7287,10 +7614,10 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 { ; CHECK-NEXT: testq %rsi, %rsi ; CHECK-NEXT: cmovnsq %rsi, %rcx ; CHECK-NEXT: cvtsi2ss %rcx, %xmm1 -; CHECK-NEXT: jns .LBB178_2 +; CHECK-NEXT: jns .LBB184_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: addss %xmm1, %xmm1 -; CHECK-NEXT: .LBB178_2: # %entry +; CHECK-NEXT: .LBB184_2: # %entry ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: shrq %rax ; CHECK-NEXT: movl %edi, %ecx @@ -7299,10 +7626,10 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 { ; CHECK-NEXT: testq %rdi, %rdi ; CHECK-NEXT: cmovnsq %rdi, %rcx ; CHECK-NEXT: cvtsi2ss %rcx, %xmm0 -; CHECK-NEXT: jns .LBB178_4 +; CHECK-NEXT: jns .LBB184_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: addss %xmm0, %xmm0 -; CHECK-NEXT: .LBB178_4: # %entry +; CHECK-NEXT: .LBB184_4: # %entry ; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: movq %rdx, %rax ; CHECK-NEXT: shrq %rax @@ -7313,10 +7640,10 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 { ; CHECK-NEXT: cmovnsq %rdx, %rcx ; CHECK-NEXT: xorps %xmm1, %xmm1 ; CHECK-NEXT: cvtsi2ss %rcx, %xmm1 -; CHECK-NEXT: jns .LBB178_6 +; CHECK-NEXT: jns .LBB184_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: addss %xmm1, %xmm1 -; CHECK-NEXT: .LBB178_6: # %entry +; CHECK-NEXT: .LBB184_6: # %entry ; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq ; @@ -7331,10 +7658,10 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 { ; AVX1-NEXT: testq %rax, %rax ; AVX1-NEXT: cmovnsq %rax, %rdx ; AVX1-NEXT: vcvtsi2ss %rdx, %xmm1, %xmm1 -; AVX1-NEXT: jns .LBB178_2 +; AVX1-NEXT: jns .LBB184_2 ; AVX1-NEXT: # %bb.1: ; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: .LBB178_2: # %entry +; AVX1-NEXT: .LBB184_2: # %entry ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx @@ -7344,10 +7671,10 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 { ; AVX1-NEXT: testq %rax, %rax ; AVX1-NEXT: cmovnsq %rax, %rdx ; AVX1-NEXT: vcvtsi2ss %rdx, %xmm2, %xmm2 -; AVX1-NEXT: jns .LBB178_4 +; AVX1-NEXT: jns .LBB184_4 ; AVX1-NEXT: # %bb.3: ; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: .LBB178_4: # %entry +; AVX1-NEXT: .LBB184_4: # %entry ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vmovq %xmm0, %rax @@ -7359,10 +7686,10 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 { ; AVX1-NEXT: testq %rax, %rax ; AVX1-NEXT: cmovnsq %rax, %rdx ; AVX1-NEXT: vcvtsi2ss %rdx, %xmm3, %xmm0 -; AVX1-NEXT: jns .LBB178_6 +; AVX1-NEXT: jns .LBB184_6 ; AVX1-NEXT: # %bb.5: ; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: .LBB178_6: # %entry +; AVX1-NEXT: .LBB184_6: # %entry ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -7477,10 +7804,10 @@ define <4 x double> @constrained_vector_uitofp_v4f64_v4i64(<4 x i64> %x) #0 { ; CHECK-NEXT: cmovnsq %rax, %rdx ; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: cvtsi2sd %rdx, %xmm0 -; CHECK-NEXT: jns .LBB181_2 +; CHECK-NEXT: jns .LBB187_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: addsd %xmm0, %xmm0 -; CHECK-NEXT: .LBB181_2: # %entry +; CHECK-NEXT: .LBB187_2: # %entry ; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] ; CHECK-NEXT: movq %xmm2, %rax ; CHECK-NEXT: movq %rax, %rcx @@ -7491,10 +7818,10 @@ define <4 x double> @constrained_vector_uitofp_v4f64_v4i64(<4 x i64> %x) #0 { ; CHECK-NEXT: testq %rax, %rax ; CHECK-NEXT: cmovnsq %rax, %rdx ; CHECK-NEXT: cvtsi2sd %rdx, %xmm3 -; CHECK-NEXT: jns .LBB181_4 +; CHECK-NEXT: jns .LBB187_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: addsd %xmm3, %xmm3 -; CHECK-NEXT: .LBB181_4: # %entry +; CHECK-NEXT: .LBB187_4: # %entry ; CHECK-NEXT: movq %xmm1, %rax ; CHECK-NEXT: movq %rax, %rcx ; CHECK-NEXT: shrq %rcx @@ -7505,10 +7832,10 @@ define <4 x double> @constrained_vector_uitofp_v4f64_v4i64(<4 x i64> %x) #0 { ; CHECK-NEXT: cmovnsq %rax, %rdx ; CHECK-NEXT: xorps %xmm2, %xmm2 ; CHECK-NEXT: cvtsi2sd %rdx, %xmm2 -; CHECK-NEXT: jns .LBB181_6 +; CHECK-NEXT: jns .LBB187_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: addsd %xmm2, %xmm2 -; CHECK-NEXT: .LBB181_6: # %entry +; CHECK-NEXT: .LBB187_6: # %entry ; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] ; CHECK-NEXT: movq %xmm1, %rax @@ -7521,10 +7848,10 @@ define <4 x double> @constrained_vector_uitofp_v4f64_v4i64(<4 x i64> %x) #0 { ; CHECK-NEXT: cmovnsq %rax, %rdx ; CHECK-NEXT: xorps %xmm1, %xmm1 ; CHECK-NEXT: cvtsi2sd %rdx, %xmm1 -; CHECK-NEXT: jns .LBB181_8 +; CHECK-NEXT: jns .LBB187_8 ; CHECK-NEXT: # %bb.7: ; CHECK-NEXT: addsd %xmm1, %xmm1 -; CHECK-NEXT: .LBB181_8: # %entry +; CHECK-NEXT: .LBB187_8: # %entry ; CHECK-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] ; CHECK-NEXT: movapd %xmm2, %xmm1 ; CHECK-NEXT: retq @@ -7601,10 +7928,10 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 { ; CHECK-NEXT: testq %rax, %rax ; CHECK-NEXT: cmovnsq %rax, %rdx ; CHECK-NEXT: cvtsi2ss %rdx, %xmm2 -; CHECK-NEXT: jns .LBB182_2 +; CHECK-NEXT: jns .LBB188_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: addss %xmm2, %xmm2 -; CHECK-NEXT: .LBB182_2: # %entry +; CHECK-NEXT: .LBB188_2: # %entry ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] ; CHECK-NEXT: movq %xmm1, %rax ; CHECK-NEXT: movq %rax, %rcx @@ -7615,10 +7942,10 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 { ; CHECK-NEXT: testq %rax, %rax ; CHECK-NEXT: cmovnsq %rax, %rdx ; CHECK-NEXT: cvtsi2ss %rdx, %xmm3 -; CHECK-NEXT: jns .LBB182_4 +; CHECK-NEXT: jns .LBB188_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: addss %xmm3, %xmm3 -; CHECK-NEXT: .LBB182_4: # %entry +; CHECK-NEXT: .LBB188_4: # %entry ; CHECK-NEXT: movq %xmm0, %rax ; CHECK-NEXT: movq %rax, %rcx ; CHECK-NEXT: shrq %rcx @@ -7629,10 +7956,10 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 { ; CHECK-NEXT: cmovnsq %rax, %rdx ; CHECK-NEXT: xorps %xmm1, %xmm1 ; CHECK-NEXT: cvtsi2ss %rdx, %xmm1 -; CHECK-NEXT: jns .LBB182_6 +; CHECK-NEXT: jns .LBB188_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: addss %xmm1, %xmm1 -; CHECK-NEXT: .LBB182_6: # %entry +; CHECK-NEXT: .LBB188_6: # %entry ; CHECK-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; CHECK-NEXT: movq %xmm0, %rax @@ -7645,10 +7972,10 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 { ; CHECK-NEXT: cmovnsq %rax, %rdx ; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: cvtsi2ss %rdx, %xmm0 -; CHECK-NEXT: jns .LBB182_8 +; CHECK-NEXT: jns .LBB188_8 ; CHECK-NEXT: # %bb.7: ; CHECK-NEXT: addss %xmm0, %xmm0 -; CHECK-NEXT: .LBB182_8: # %entry +; CHECK-NEXT: .LBB188_8: # %entry ; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; CHECK-NEXT: movaps %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-2.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-2.ll index 00e43df15deea..b3d8d05f69947 100644 --- a/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-2.ll +++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-2.ll @@ -4,14 +4,14 @@ ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX2-FP ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX2-FCP -; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX512 -; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512-FCP -; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefixes=AVX512DQ -; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512DQ-FCP -; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512BW -; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512BW-FCP -; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX512DQ-BW -; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512DQ-BW-FCP +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512-VL +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512-FCP +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512DQ-FCP +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW-FCP +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512DQ-BW +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512DQ-BW-FCP ; These patterns are produced by LoopVectorizer for interleaved loads. @@ -69,69 +69,6 @@ define void @load_i16_stride2_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) nou ; AVX512-NEXT: vmovd %xmm1, (%rsi) ; AVX512-NEXT: vmovd %xmm0, (%rdx) ; AVX512-NEXT: retq -; -; AVX512-FCP-LABEL: load_i16_stride2_vf2: -; AVX512-FCP: # %bb.0: -; AVX512-FCP-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; AVX512-FCP-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7] -; AVX512-FCP-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,3,2,3,4,5,6,7] -; AVX512-FCP-NEXT: vmovd %xmm1, (%rsi) -; AVX512-FCP-NEXT: vmovd %xmm0, (%rdx) -; AVX512-FCP-NEXT: retq -; -; AVX512DQ-LABEL: load_i16_stride2_vf2: -; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7] -; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,3,2,3,4,5,6,7] -; AVX512DQ-NEXT: vmovd %xmm1, (%rsi) -; AVX512DQ-NEXT: vmovd %xmm0, (%rdx) -; AVX512DQ-NEXT: retq -; -; AVX512DQ-FCP-LABEL: load_i16_stride2_vf2: -; AVX512DQ-FCP: # %bb.0: -; AVX512DQ-FCP-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; AVX512DQ-FCP-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7] -; AVX512DQ-FCP-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,3,2,3,4,5,6,7] -; AVX512DQ-FCP-NEXT: vmovd %xmm1, (%rsi) -; AVX512DQ-FCP-NEXT: vmovd %xmm0, (%rdx) -; AVX512DQ-FCP-NEXT: retq -; -; AVX512BW-LABEL: load_i16_stride2_vf2: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; AVX512BW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7] -; AVX512BW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,3,2,3,4,5,6,7] -; AVX512BW-NEXT: vmovd %xmm1, (%rsi) -; AVX512BW-NEXT: vmovd %xmm0, (%rdx) -; AVX512BW-NEXT: retq -; -; AVX512BW-FCP-LABEL: load_i16_stride2_vf2: -; AVX512BW-FCP: # %bb.0: -; AVX512BW-FCP-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; AVX512BW-FCP-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7] -; AVX512BW-FCP-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,3,2,3,4,5,6,7] -; AVX512BW-FCP-NEXT: vmovd %xmm1, (%rsi) -; AVX512BW-FCP-NEXT: vmovd %xmm0, (%rdx) -; AVX512BW-FCP-NEXT: retq -; -; AVX512DQ-BW-LABEL: load_i16_stride2_vf2: -; AVX512DQ-BW: # %bb.0: -; AVX512DQ-BW-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; AVX512DQ-BW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7] -; AVX512DQ-BW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,3,2,3,4,5,6,7] -; AVX512DQ-BW-NEXT: vmovd %xmm1, (%rsi) -; AVX512DQ-BW-NEXT: vmovd %xmm0, (%rdx) -; AVX512DQ-BW-NEXT: retq -; -; AVX512DQ-BW-FCP-LABEL: load_i16_stride2_vf2: -; AVX512DQ-BW-FCP: # %bb.0: -; AVX512DQ-BW-FCP-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; AVX512DQ-BW-FCP-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7] -; AVX512DQ-BW-FCP-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,3,2,3,4,5,6,7] -; AVX512DQ-BW-FCP-NEXT: vmovd %xmm1, (%rsi) -; AVX512DQ-BW-FCP-NEXT: vmovd %xmm0, (%rdx) -; AVX512DQ-BW-FCP-NEXT: retq %wide.vec = load <4 x i16>, ptr %in.vec, align 64 %strided.vec0 = shufflevector <4 x i16> %wide.vec, <4 x i16> poison, <2 x i32> %strided.vec1 = shufflevector <4 x i16> %wide.vec, <4 x i16> poison, <2 x i32> @@ -198,62 +135,6 @@ define void @load_i16_stride2_vf4(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) nou ; AVX512-NEXT: vpmovdw %xmm0, (%rsi) ; AVX512-NEXT: vmovq %xmm1, (%rdx) ; AVX512-NEXT: retq -; -; AVX512-FCP-LABEL: load_i16_stride2_vf4: -; AVX512-FCP: # %bb.0: -; AVX512-FCP-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512-FCP-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u] -; AVX512-FCP-NEXT: vpmovdw %xmm0, (%rsi) -; AVX512-FCP-NEXT: vmovq %xmm1, (%rdx) -; AVX512-FCP-NEXT: retq -; -; AVX512DQ-LABEL: load_i16_stride2_vf4: -; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u] -; AVX512DQ-NEXT: vpmovdw %xmm0, (%rsi) -; AVX512DQ-NEXT: vmovq %xmm1, (%rdx) -; AVX512DQ-NEXT: retq -; -; AVX512DQ-FCP-LABEL: load_i16_stride2_vf4: -; AVX512DQ-FCP: # %bb.0: -; AVX512DQ-FCP-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u] -; AVX512DQ-FCP-NEXT: vpmovdw %xmm0, (%rsi) -; AVX512DQ-FCP-NEXT: vmovq %xmm1, (%rdx) -; AVX512DQ-FCP-NEXT: retq -; -; AVX512BW-LABEL: load_i16_stride2_vf4: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u] -; AVX512BW-NEXT: vpmovdw %xmm0, (%rsi) -; AVX512BW-NEXT: vmovq %xmm1, (%rdx) -; AVX512BW-NEXT: retq -; -; AVX512BW-FCP-LABEL: load_i16_stride2_vf4: -; AVX512BW-FCP: # %bb.0: -; AVX512BW-FCP-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512BW-FCP-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u] -; AVX512BW-FCP-NEXT: vpmovdw %xmm0, (%rsi) -; AVX512BW-FCP-NEXT: vmovq %xmm1, (%rdx) -; AVX512BW-FCP-NEXT: retq -; -; AVX512DQ-BW-LABEL: load_i16_stride2_vf4: -; AVX512DQ-BW: # %bb.0: -; AVX512DQ-BW-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u] -; AVX512DQ-BW-NEXT: vpmovdw %xmm0, (%rsi) -; AVX512DQ-BW-NEXT: vmovq %xmm1, (%rdx) -; AVX512DQ-BW-NEXT: retq -; -; AVX512DQ-BW-FCP-LABEL: load_i16_stride2_vf4: -; AVX512DQ-BW-FCP: # %bb.0: -; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512DQ-BW-FCP-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u] -; AVX512DQ-BW-FCP-NEXT: vpmovdw %xmm0, (%rsi) -; AVX512DQ-BW-FCP-NEXT: vmovq %xmm1, (%rdx) -; AVX512DQ-BW-FCP-NEXT: retq %wide.vec = load <8 x i16>, ptr %in.vec, align 64 %strided.vec0 = shufflevector <8 x i16> %wide.vec, <8 x i16> poison, <4 x i32> %strided.vec1 = shufflevector <8 x i16> %wide.vec, <8 x i16> poison, <4 x i32> @@ -349,69 +230,6 @@ define void @load_i16_stride2_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) nou ; AVX512-NEXT: vpmovdw %ymm1, (%rdx) ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq -; -; AVX512-FCP-LABEL: load_i16_stride2_vf8: -; AVX512-FCP: # %bb.0: -; AVX512-FCP-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512-FCP-NEXT: vpsrld $16, %ymm0, %ymm1 -; AVX512-FCP-NEXT: vpmovdw %ymm0, (%rsi) -; AVX512-FCP-NEXT: vpmovdw %ymm1, (%rdx) -; AVX512-FCP-NEXT: vzeroupper -; AVX512-FCP-NEXT: retq -; -; AVX512DQ-LABEL: load_i16_stride2_vf8: -; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512DQ-NEXT: vpsrld $16, %ymm0, %ymm1 -; AVX512DQ-NEXT: vpmovdw %ymm0, (%rsi) -; AVX512DQ-NEXT: vpmovdw %ymm1, (%rdx) -; AVX512DQ-NEXT: vzeroupper -; AVX512DQ-NEXT: retq -; -; AVX512DQ-FCP-LABEL: load_i16_stride2_vf8: -; AVX512DQ-FCP: # %bb.0: -; AVX512DQ-FCP-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512DQ-FCP-NEXT: vpsrld $16, %ymm0, %ymm1 -; AVX512DQ-FCP-NEXT: vpmovdw %ymm0, (%rsi) -; AVX512DQ-FCP-NEXT: vpmovdw %ymm1, (%rdx) -; AVX512DQ-FCP-NEXT: vzeroupper -; AVX512DQ-FCP-NEXT: retq -; -; AVX512BW-LABEL: load_i16_stride2_vf8: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512BW-NEXT: vpsrld $16, %ymm0, %ymm1 -; AVX512BW-NEXT: vpmovdw %ymm0, (%rsi) -; AVX512BW-NEXT: vpmovdw %ymm1, (%rdx) -; AVX512BW-NEXT: vzeroupper -; AVX512BW-NEXT: retq -; -; AVX512BW-FCP-LABEL: load_i16_stride2_vf8: -; AVX512BW-FCP: # %bb.0: -; AVX512BW-FCP-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512BW-FCP-NEXT: vpsrld $16, %ymm0, %ymm1 -; AVX512BW-FCP-NEXT: vpmovdw %ymm0, (%rsi) -; AVX512BW-FCP-NEXT: vpmovdw %ymm1, (%rdx) -; AVX512BW-FCP-NEXT: vzeroupper -; AVX512BW-FCP-NEXT: retq -; -; AVX512DQ-BW-LABEL: load_i16_stride2_vf8: -; AVX512DQ-BW: # %bb.0: -; AVX512DQ-BW-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512DQ-BW-NEXT: vpsrld $16, %ymm0, %ymm1 -; AVX512DQ-BW-NEXT: vpmovdw %ymm0, (%rsi) -; AVX512DQ-BW-NEXT: vpmovdw %ymm1, (%rdx) -; AVX512DQ-BW-NEXT: vzeroupper -; AVX512DQ-BW-NEXT: retq -; -; AVX512DQ-BW-FCP-LABEL: load_i16_stride2_vf8: -; AVX512DQ-BW-FCP: # %bb.0: -; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512DQ-BW-FCP-NEXT: vpsrld $16, %ymm0, %ymm1 -; AVX512DQ-BW-FCP-NEXT: vpmovdw %ymm0, (%rsi) -; AVX512DQ-BW-FCP-NEXT: vpmovdw %ymm1, (%rdx) -; AVX512DQ-BW-FCP-NEXT: vzeroupper -; AVX512DQ-BW-FCP-NEXT: retq %wide.vec = load <16 x i16>, ptr %in.vec, align 64 %strided.vec0 = shufflevector <16 x i16> %wide.vec, <16 x i16> poison, <8 x i32> %strided.vec1 = shufflevector <16 x i16> %wide.vec, <16 x i16> poison, <8 x i32> @@ -544,69 +362,6 @@ define void @load_i16_stride2_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) no ; AVX512-NEXT: vpmovdw %zmm1, (%rdx) ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq -; -; AVX512-FCP-LABEL: load_i16_stride2_vf16: -; AVX512-FCP: # %bb.0: -; AVX512-FCP-NEXT: vmovdqa64 (%rdi), %zmm0 -; AVX512-FCP-NEXT: vpsrld $16, %zmm0, %zmm1 -; AVX512-FCP-NEXT: vpmovdw %zmm0, (%rsi) -; AVX512-FCP-NEXT: vpmovdw %zmm1, (%rdx) -; AVX512-FCP-NEXT: vzeroupper -; AVX512-FCP-NEXT: retq -; -; AVX512DQ-LABEL: load_i16_stride2_vf16: -; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vmovdqa64 (%rdi), %zmm0 -; AVX512DQ-NEXT: vpsrld $16, %zmm0, %zmm1 -; AVX512DQ-NEXT: vpmovdw %zmm0, (%rsi) -; AVX512DQ-NEXT: vpmovdw %zmm1, (%rdx) -; AVX512DQ-NEXT: vzeroupper -; AVX512DQ-NEXT: retq -; -; AVX512DQ-FCP-LABEL: load_i16_stride2_vf16: -; AVX512DQ-FCP: # %bb.0: -; AVX512DQ-FCP-NEXT: vmovdqa64 (%rdi), %zmm0 -; AVX512DQ-FCP-NEXT: vpsrld $16, %zmm0, %zmm1 -; AVX512DQ-FCP-NEXT: vpmovdw %zmm0, (%rsi) -; AVX512DQ-FCP-NEXT: vpmovdw %zmm1, (%rdx) -; AVX512DQ-FCP-NEXT: vzeroupper -; AVX512DQ-FCP-NEXT: retq -; -; AVX512BW-LABEL: load_i16_stride2_vf16: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 -; AVX512BW-NEXT: vpsrld $16, %zmm0, %zmm1 -; AVX512BW-NEXT: vpmovdw %zmm0, (%rsi) -; AVX512BW-NEXT: vpmovdw %zmm1, (%rdx) -; AVX512BW-NEXT: vzeroupper -; AVX512BW-NEXT: retq -; -; AVX512BW-FCP-LABEL: load_i16_stride2_vf16: -; AVX512BW-FCP: # %bb.0: -; AVX512BW-FCP-NEXT: vmovdqa64 (%rdi), %zmm0 -; AVX512BW-FCP-NEXT: vpsrld $16, %zmm0, %zmm1 -; AVX512BW-FCP-NEXT: vpmovdw %zmm0, (%rsi) -; AVX512BW-FCP-NEXT: vpmovdw %zmm1, (%rdx) -; AVX512BW-FCP-NEXT: vzeroupper -; AVX512BW-FCP-NEXT: retq -; -; AVX512DQ-BW-LABEL: load_i16_stride2_vf16: -; AVX512DQ-BW: # %bb.0: -; AVX512DQ-BW-NEXT: vmovdqa64 (%rdi), %zmm0 -; AVX512DQ-BW-NEXT: vpsrld $16, %zmm0, %zmm1 -; AVX512DQ-BW-NEXT: vpmovdw %zmm0, (%rsi) -; AVX512DQ-BW-NEXT: vpmovdw %zmm1, (%rdx) -; AVX512DQ-BW-NEXT: vzeroupper -; AVX512DQ-BW-NEXT: retq -; -; AVX512DQ-BW-FCP-LABEL: load_i16_stride2_vf16: -; AVX512DQ-BW-FCP: # %bb.0: -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 (%rdi), %zmm0 -; AVX512DQ-BW-FCP-NEXT: vpsrld $16, %zmm0, %zmm1 -; AVX512DQ-BW-FCP-NEXT: vpmovdw %zmm0, (%rsi) -; AVX512DQ-BW-FCP-NEXT: vpmovdw %zmm1, (%rdx) -; AVX512DQ-BW-FCP-NEXT: vzeroupper -; AVX512DQ-BW-FCP-NEXT: retq %wide.vec = load <32 x i16>, ptr %in.vec, align 64 %strided.vec0 = shufflevector <32 x i16> %wide.vec, <32 x i16> poison, <16 x i32> %strided.vec1 = shufflevector <32 x i16> %wide.vec, <32 x i16> poison, <16 x i32> @@ -817,18 +572,18 @@ define void @load_i16_stride2_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) no ; AVX2-FCP-NEXT: vzeroupper ; AVX2-FCP-NEXT: retq ; -; AVX512-LABEL: load_i16_stride2_vf32: -; AVX512: # %bb.0: -; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 -; AVX512-NEXT: vmovdqa64 64(%rdi), %zmm1 -; AVX512-NEXT: vpsrld $16, %zmm0, %zmm2 -; AVX512-NEXT: vpsrld $16, %zmm1, %zmm3 -; AVX512-NEXT: vpmovdw %zmm1, 32(%rsi) -; AVX512-NEXT: vpmovdw %zmm0, (%rsi) -; AVX512-NEXT: vpmovdw %zmm3, 32(%rdx) -; AVX512-NEXT: vpmovdw %zmm2, (%rdx) -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512-VL-LABEL: load_i16_stride2_vf32: +; AVX512-VL: # %bb.0: +; AVX512-VL-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512-VL-NEXT: vmovdqa64 64(%rdi), %zmm1 +; AVX512-VL-NEXT: vpsrld $16, %zmm0, %zmm2 +; AVX512-VL-NEXT: vpsrld $16, %zmm1, %zmm3 +; AVX512-VL-NEXT: vpmovdw %zmm1, 32(%rsi) +; AVX512-VL-NEXT: vpmovdw %zmm0, (%rsi) +; AVX512-VL-NEXT: vpmovdw %zmm3, 32(%rdx) +; AVX512-VL-NEXT: vpmovdw %zmm2, (%rdx) +; AVX512-VL-NEXT: vzeroupper +; AVX512-VL-NEXT: retq ; ; AVX512-FCP-LABEL: load_i16_stride2_vf32: ; AVX512-FCP: # %bb.0: @@ -1344,27 +1099,27 @@ define void @load_i16_stride2_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) no ; AVX2-FCP-NEXT: vzeroupper ; AVX2-FCP-NEXT: retq ; -; AVX512-LABEL: load_i16_stride2_vf64: -; AVX512: # %bb.0: -; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 -; AVX512-NEXT: vmovdqa64 64(%rdi), %zmm1 -; AVX512-NEXT: vmovdqa64 128(%rdi), %zmm2 -; AVX512-NEXT: vmovdqa64 192(%rdi), %zmm3 -; AVX512-NEXT: vpmovdw %zmm1, %ymm4 -; AVX512-NEXT: vpsrld $16, %zmm1, %zmm1 -; AVX512-NEXT: vpsrld $16, %zmm0, %zmm5 -; AVX512-NEXT: vpsrld $16, %zmm3, %zmm6 -; AVX512-NEXT: vpsrld $16, %zmm2, %zmm7 -; AVX512-NEXT: vpmovdw %zmm0, (%rsi) -; AVX512-NEXT: vmovdqa %ymm4, 32(%rsi) -; AVX512-NEXT: vpmovdw %zmm2, 64(%rsi) -; AVX512-NEXT: vpmovdw %zmm3, 96(%rsi) -; AVX512-NEXT: vpmovdw %zmm7, 64(%rdx) -; AVX512-NEXT: vpmovdw %zmm6, 96(%rdx) -; AVX512-NEXT: vpmovdw %zmm5, (%rdx) -; AVX512-NEXT: vpmovdw %zmm1, 32(%rdx) -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512-VL-LABEL: load_i16_stride2_vf64: +; AVX512-VL: # %bb.0: +; AVX512-VL-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512-VL-NEXT: vmovdqa64 64(%rdi), %zmm1 +; AVX512-VL-NEXT: vmovdqa64 128(%rdi), %zmm2 +; AVX512-VL-NEXT: vmovdqa64 192(%rdi), %zmm3 +; AVX512-VL-NEXT: vpmovdw %zmm1, %ymm4 +; AVX512-VL-NEXT: vpsrld $16, %zmm1, %zmm1 +; AVX512-VL-NEXT: vpsrld $16, %zmm0, %zmm5 +; AVX512-VL-NEXT: vpsrld $16, %zmm3, %zmm6 +; AVX512-VL-NEXT: vpsrld $16, %zmm2, %zmm7 +; AVX512-VL-NEXT: vpmovdw %zmm0, (%rsi) +; AVX512-VL-NEXT: vmovdqa %ymm4, 32(%rsi) +; AVX512-VL-NEXT: vpmovdw %zmm2, 64(%rsi) +; AVX512-VL-NEXT: vpmovdw %zmm3, 96(%rsi) +; AVX512-VL-NEXT: vpmovdw %zmm7, 64(%rdx) +; AVX512-VL-NEXT: vpmovdw %zmm6, 96(%rdx) +; AVX512-VL-NEXT: vpmovdw %zmm5, (%rdx) +; AVX512-VL-NEXT: vpmovdw %zmm1, 32(%rdx) +; AVX512-VL-NEXT: vzeroupper +; AVX512-VL-NEXT: retq ; ; AVX512-FCP-LABEL: load_i16_stride2_vf64: ; AVX512-FCP: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll index c71a96f704ac3..b214bf082f235 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll @@ -413,12 +413,9 @@ define half @test_v2f16(<2 x half> %a0) nounwind { ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512F-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm2 -; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm3 -; AVX512F-NEXT: vucomiss %xmm3, %xmm2 -; AVX512F-NEXT: seta %al -; AVX512F-NEXT: negb %al -; AVX512F-NEXT: kmovd %eax, %k1 +; AVX512F-NEXT: vcvtph2ps %xmm0, %ymm2 +; AVX512F-NEXT: vcvtph2ps %xmm1, %ymm3 +; AVX512F-NEXT: vcmpltps %zmm2, %zmm3, %k1 ; AVX512F-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} ; AVX512F-NEXT: vmovdqa %xmm1, %xmm0 ; AVX512F-NEXT: vzeroupper @@ -427,14 +424,12 @@ define half @test_v2f16(<2 x half> %a0) nounwind { ; AVX512VL-LABEL: test_v2f16: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm2 -; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm3 -; AVX512VL-NEXT: vucomiss %xmm3, %xmm2 -; AVX512VL-NEXT: seta %al -; AVX512VL-NEXT: negb %al -; AVX512VL-NEXT: kmovd %eax, %k1 +; AVX512VL-NEXT: vcvtph2ps %xmm0, %ymm2 +; AVX512VL-NEXT: vcvtph2ps %xmm1, %ymm3 +; AVX512VL-NEXT: vcmpltps %ymm2, %ymm3, %k1 ; AVX512VL-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1} ; AVX512VL-NEXT: vmovdqa %xmm1, %xmm0 +; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq ; ; AVX512FP16-LABEL: test_v2f16: diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll index 2dffe2bf0dfa1..9f37df716b6cd 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll @@ -412,12 +412,9 @@ define half @test_v2f16(<2 x half> %a0) nounwind { ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512F-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm2 -; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm3 -; AVX512F-NEXT: xorl %eax, %eax -; AVX512F-NEXT: vucomiss %xmm3, %xmm2 -; AVX512F-NEXT: sbbl %eax, %eax -; AVX512F-NEXT: kmovd %eax, %k1 +; AVX512F-NEXT: vcvtph2ps %xmm0, %ymm2 +; AVX512F-NEXT: vcvtph2ps %xmm1, %ymm3 +; AVX512F-NEXT: vcmpltps %zmm3, %zmm2, %k1 ; AVX512F-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} ; AVX512F-NEXT: vmovdqa %xmm1, %xmm0 ; AVX512F-NEXT: vzeroupper @@ -426,14 +423,12 @@ define half @test_v2f16(<2 x half> %a0) nounwind { ; AVX512VL-LABEL: test_v2f16: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm2 -; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm3 -; AVX512VL-NEXT: xorl %eax, %eax -; AVX512VL-NEXT: vucomiss %xmm3, %xmm2 -; AVX512VL-NEXT: sbbl %eax, %eax -; AVX512VL-NEXT: kmovd %eax, %k1 +; AVX512VL-NEXT: vcvtph2ps %xmm0, %ymm2 +; AVX512VL-NEXT: vcvtph2ps %xmm1, %ymm3 +; AVX512VL-NEXT: vcmpltps %ymm3, %ymm2, %k1 ; AVX512VL-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1} ; AVX512VL-NEXT: vmovdqa %xmm1, %xmm0 +; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq ; ; AVX512FP16-LABEL: test_v2f16: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll index 6360c68e62cc9..6fe16f85ec6be 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -2516,10 +2516,8 @@ define <4 x float> @shuffle_mem_v4f32_0624(<4 x float> %a0, ptr %a1) { ; ; AVX512VL-LABEL: shuffle_mem_v4f32_0624: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovaps (%rdi), %xmm2 -; AVX512VL-NEXT: vmovaps {{.*#+}} xmm1 = [0,6,2,4] -; AVX512VL-NEXT: vpermi2ps %xmm0, %xmm2, %xmm1 -; AVX512VL-NEXT: vmovaps %xmm1, %xmm0 +; AVX512VL-NEXT: vmovaps {{.*#+}} xmm1 = [4,2,6,0] +; AVX512VL-NEXT: vpermt2ps (%rdi), %xmm1, %xmm0 ; AVX512VL-NEXT: retq %1 = load <4 x float>, ptr %a1 %2 = shufflevector <4 x float> %1, <4 x float> %a0, <4 x i32> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll index 8cc20ec3c1a7e..3fd73319e8577 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll @@ -339,7 +339,7 @@ define <64 x i8> @test_mm512_mask_blend_epi8(<64 x i8> %A, <64 x i8> %W){ ; AVX512F: # %bb.0: # %entry ; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2 -; AVX512F-NEXT: vpternlogq $216, %zmm2, %zmm1, %zmm0 +; AVX512F-NEXT: vpternlogq {{.*#+}} zmm0 = zmm0 ^ (zmm2 & (zmm0 ^ zmm1)) ; AVX512F-NEXT: ret{{[l|q]}} entry: %0 = shufflevector <64 x i8> %A, <64 x i8> %W, <64 x i32> @@ -354,15 +354,10 @@ define <32 x i16> @test_mm512_mask_blend_epi16(<32 x i16> %A, <32 x i16> %W){ ; AVX512-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} ; AVX512-NEXT: ret{{[l|q]}} ; -; X86-AVX512F-LABEL: test_mm512_mask_blend_epi16: -; X86-AVX512F: # %bb.0: # %entry -; X86-AVX512F-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm1, %zmm0 -; X86-AVX512F-NEXT: retl -; -; X64-AVX512F-LABEL: test_mm512_mask_blend_epi16: -; X64-AVX512F: # %bb.0: # %entry -; X64-AVX512F-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0 -; X64-AVX512F-NEXT: retq +; AVX512F-LABEL: test_mm512_mask_blend_epi16: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1)) +; AVX512F-NEXT: ret{{[l|q]}} entry: %0 = shufflevector <32 x i16> %A, <32 x i16> %W, <32 x i32> ret <32 x i16> %0 @@ -486,18 +481,14 @@ define <8 x float> @test_masked_permps_v8f32(ptr %vp, <8 x float> %vec2) { ; X86-AVX512-LABEL: test_masked_permps_v8f32: ; X86-AVX512: # %bb.0: ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX512-NEXT: vmovaps (%eax), %ymm2 -; X86-AVX512-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,3,11,7,6,14,15] -; X86-AVX512-NEXT: vpermi2ps %ymm0, %ymm2, %ymm1 -; X86-AVX512-NEXT: vmovaps %ymm1, %ymm0 +; X86-AVX512-NEXT: vmovaps {{.*#+}} ymm1 = [15,14,11,3,15,14,6,7] +; X86-AVX512-NEXT: vpermt2ps (%eax), %ymm1, %ymm0 ; X86-AVX512-NEXT: retl ; ; X64-AVX512-LABEL: test_masked_permps_v8f32: ; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovaps (%rdi), %ymm2 -; X64-AVX512-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,3,11,7,6,14,15] -; X64-AVX512-NEXT: vpermi2ps %ymm0, %ymm2, %ymm1 -; X64-AVX512-NEXT: vmovaps %ymm1, %ymm0 +; X64-AVX512-NEXT: vmovaps {{.*#+}} ymm1 = [15,14,11,3,15,14,6,7] +; X64-AVX512-NEXT: vpermt2ps (%rdi), %ymm1, %ymm0 ; X64-AVX512-NEXT: retq ; ; X86-AVX512F-LABEL: test_masked_permps_v8f32: @@ -505,18 +496,18 @@ define <8 x float> @test_masked_permps_v8f32(ptr %vp, <8 x float> %vec2) { ; X86-AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX512F-NEXT: vmovaps (%eax), %ymm1 -; X86-AVX512F-NEXT: vmovaps {{.*#+}} ymm2 = [7,6,3,19,7,6,22,23] -; X86-AVX512F-NEXT: vpermt2ps %zmm0, %zmm2, %zmm1 -; X86-AVX512F-NEXT: vmovaps %ymm1, %ymm0 +; X86-AVX512F-NEXT: vmovaps {{.*#+}} ymm2 = [23,22,19,3,23,22,6,7] +; X86-AVX512F-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0 +; X86-AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; X86-AVX512F-NEXT: retl ; ; X64-AVX512F-LABEL: test_masked_permps_v8f32: ; X64-AVX512F: # %bb.0: ; X64-AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; X64-AVX512F-NEXT: vmovaps (%rdi), %ymm1 -; X64-AVX512F-NEXT: vmovaps {{.*#+}} ymm2 = [7,6,3,19,7,6,22,23] -; X64-AVX512F-NEXT: vpermt2ps %zmm0, %zmm2, %zmm1 -; X64-AVX512F-NEXT: vmovaps %ymm1, %ymm0 +; X64-AVX512F-NEXT: vmovaps {{.*#+}} ymm2 = [23,22,19,3,23,22,6,7] +; X64-AVX512F-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0 +; X64-AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; X64-AVX512F-NEXT: retq %vec = load <8 x float>, ptr %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> @@ -528,35 +519,27 @@ define <16 x float> @test_masked_permps_v16f32(ptr %vp, <16 x float> %vec2) { ; X86-AVX512-LABEL: test_masked_permps_v16f32: ; X86-AVX512: # %bb.0: ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX512-NEXT: vmovaps (%eax), %zmm2 -; X86-AVX512-NEXT: vmovaps {{.*#+}} zmm1 = [15,13,11,19,14,12,22,23,7,6,3,27,7,29,3,31] -; X86-AVX512-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1 -; X86-AVX512-NEXT: vmovaps %zmm1, %zmm0 +; X86-AVX512-NEXT: vmovaps {{.*#+}} zmm1 = [31,29,27,3,30,28,6,7,23,22,19,11,23,13,19,15] +; X86-AVX512-NEXT: vpermt2ps (%eax), %zmm1, %zmm0 ; X86-AVX512-NEXT: retl ; ; X64-AVX512-LABEL: test_masked_permps_v16f32: ; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovaps (%rdi), %zmm2 -; X64-AVX512-NEXT: vmovaps {{.*#+}} zmm1 = [15,13,11,19,14,12,22,23,7,6,3,27,7,29,3,31] -; X64-AVX512-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1 -; X64-AVX512-NEXT: vmovaps %zmm1, %zmm0 +; X64-AVX512-NEXT: vmovaps {{.*#+}} zmm1 = [31,29,27,3,30,28,6,7,23,22,19,11,23,13,19,15] +; X64-AVX512-NEXT: vpermt2ps (%rdi), %zmm1, %zmm0 ; X64-AVX512-NEXT: retq ; ; X86-AVX512F-LABEL: test_masked_permps_v16f32: ; X86-AVX512F: # %bb.0: ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX512F-NEXT: vmovaps (%eax), %zmm2 -; X86-AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [15,13,11,19,14,12,22,23,7,6,3,27,7,29,3,31] -; X86-AVX512F-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1 -; X86-AVX512F-NEXT: vmovaps %zmm1, %zmm0 +; X86-AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [31,29,27,3,30,28,6,7,23,22,19,11,23,13,19,15] +; X86-AVX512F-NEXT: vpermt2ps (%eax), %zmm1, %zmm0 ; X86-AVX512F-NEXT: retl ; ; X64-AVX512F-LABEL: test_masked_permps_v16f32: ; X64-AVX512F: # %bb.0: -; X64-AVX512F-NEXT: vmovaps (%rdi), %zmm2 -; X64-AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [15,13,11,19,14,12,22,23,7,6,3,27,7,29,3,31] -; X64-AVX512F-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1 -; X64-AVX512F-NEXT: vmovaps %zmm1, %zmm0 +; X64-AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [31,29,27,3,30,28,6,7,23,22,19,11,23,13,19,15] +; X64-AVX512F-NEXT: vpermt2ps (%rdi), %zmm1, %zmm0 ; X64-AVX512F-NEXT: retq %vec = load <16 x float>, ptr %vp %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll index 6f9b3e94aa68f..2b89590a0bb41 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll @@ -719,10 +719,9 @@ define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8 %a) { ; AVX512F: # %bb.0: ; AVX512F-NEXT: kmovw %edi, %k1 ; AVX512F-NEXT: vpternlogq {{.*#+}} zmm0 {%k1} {z} = -1 -; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm1 = [9,6,1,0,3,7,7,1] -; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm2 = [18446744073709551615,18446744073709551615,0,0,0,0,0,0] -; AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm2 -; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k0 +; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm1 = [1,14,9,8,11,15,15,9] +; AVX512F-NEXT: vpermi2q {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 +; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: # kill: def $al killed $al killed $eax ; AVX512F-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vector-shuffle-v48.ll b/llvm/test/CodeGen/X86/vector-shuffle-v48.ll index ed9f849d35d00..0efbe018764d2 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-v48.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-v48.ll @@ -79,10 +79,9 @@ define <32 x i8> @foo(ptr %x0) { ; ; AVX512VBMI-LABEL: foo: ; AVX512VBMI: # %bb.0: -; AVX512VBMI-NEXT: vmovdqu (%rdi), %ymm1 -; AVX512VBMI-NEXT: vmovdqu 32(%rdi), %xmm2 -; AVX512VBMI-NEXT: vmovdqa {{.*#+}} ymm0 = [0,1,3,4,6,7,9,10,12,13,15,16,18,19,21,22,24,25,27,28,30,31,33,34,36,37,39,40,42,43,45,46] -; AVX512VBMI-NEXT: vpermi2b %ymm2, %ymm1, %ymm0 +; AVX512VBMI-NEXT: vmovdqu 32(%rdi), %xmm1 +; AVX512VBMI-NEXT: vmovdqa {{.*#+}} ymm0 = [32,33,35,36,38,39,41,42,44,45,47,48,50,51,53,54,56,57,59,60,62,63,1,2,4,5,7,8,10,11,13,14] +; AVX512VBMI-NEXT: vpermi2b (%rdi), %ymm1, %ymm0 ; AVX512VBMI-NEXT: retq %1 = load <48 x i8>, ptr %x0, align 1 %2 = shufflevector <48 x i8> %1, <48 x i8> undef, <32 x i32> diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll index ac267544f0c0e..181f5651784d8 100644 --- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll +++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll @@ -4895,11 +4895,10 @@ define void @vec512_i8_widen_to_i256_factor32_broadcast_to_v2i256_factor2(ptr %i ; ; AVX512BW-LABEL: vec512_i8_widen_to_i256_factor32_broadcast_to_v2i256_factor2: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,0,2,0,8,0,6,0] ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,0,10,0,0,0,14,0] -; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512BW-NEXT: vpermt2q (%rdi), %zmm0, %zmm1 +; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 ; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx) ; AVX512BW-NEXT: vzeroupper @@ -4997,11 +4996,10 @@ define void @vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16(ptr %i ; ; AVX512BW-LABEL: vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512BW-NEXT: vpmovsxbw {{.*#+}} zmm0 = [32,1,32,3,32,5,32,7,32,9,32,11,32,13,32,15,32,17,32,19,32,21,32,23,32,25,32,27,32,29,32,31] ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovsxbw {{.*#+}} zmm2 = [0,33,0,35,0,37,0,39,0,41,0,43,0,45,0,47,0,49,0,51,0,53,0,55,0,57,0,59,0,61,0,63] -; AVX512BW-NEXT: vpermi2w %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vpaddb (%rsi), %zmm2, %zmm0 +; AVX512BW-NEXT: vpermt2w (%rdi), %zmm0, %zmm1 +; AVX512BW-NEXT: vpaddb (%rsi), %zmm1, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -5411,13 +5409,12 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in. ; ; AVX512F-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512F-NEXT: vpmovsxbd {{.*#+}} zmm0 = [16,1,16,3,16,5,16,7,16,9,16,11,16,13,16,15] ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpmovsxbd {{.*#+}} zmm2 = [0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31] -; AVX512F-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm0 +; AVX512F-NEXT: vpermt2d (%rdi), %zmm0, %zmm1 +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm0 ; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0 -; AVX512F-NEXT: vpaddb (%rsi), %ymm2, %ymm1 +; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1 ; AVX512F-NEXT: vmovdqa %ymm1, (%rdx) ; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx) ; AVX512F-NEXT: vzeroupper @@ -5425,13 +5422,12 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in. ; ; AVX512DQ-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512DQ-NEXT: vpmovsxbd {{.*#+}} zmm0 = [16,1,16,3,16,5,16,7,16,9,16,11,16,13,16,15] ; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpmovsxbd {{.*#+}} zmm2 = [0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31] -; AVX512DQ-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0 +; AVX512DQ-NEXT: vpermt2d (%rdi), %zmm0, %zmm1 +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm0 ; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0 -; AVX512DQ-NEXT: vpaddb (%rsi), %ymm2, %ymm1 +; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1 ; AVX512DQ-NEXT: vmovdqa %ymm1, (%rdx) ; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx) ; AVX512DQ-NEXT: vzeroupper @@ -5439,11 +5435,10 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in. ; ; AVX512BW-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512BW-NEXT: vpmovsxbd {{.*#+}} zmm0 = [16,1,16,3,16,5,16,7,16,9,16,11,16,13,16,15] ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovsxbd {{.*#+}} zmm2 = [0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31] -; AVX512BW-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vpaddb (%rsi), %zmm2, %zmm0 +; AVX512BW-NEXT: vpermt2d (%rdi), %zmm0, %zmm1 +; AVX512BW-NEXT: vpaddb (%rsi), %zmm1, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -5679,13 +5674,12 @@ define void @vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4(ptr %i ; ; AVX512F-LABEL: vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,1,8,3,8,5,8,7] ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,9,0,11,0,13,0,15] -; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm0 +; AVX512F-NEXT: vpermt2q (%rdi), %zmm0, %zmm1 +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm0 ; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0 -; AVX512F-NEXT: vpaddb (%rsi), %ymm2, %ymm1 +; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1 ; AVX512F-NEXT: vmovdqa %ymm1, (%rdx) ; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx) ; AVX512F-NEXT: vzeroupper @@ -5693,13 +5687,12 @@ define void @vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4(ptr %i ; ; AVX512DQ-LABEL: vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,1,8,3,8,5,8,7] ; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,9,0,11,0,13,0,15] -; AVX512DQ-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0 +; AVX512DQ-NEXT: vpermt2q (%rdi), %zmm0, %zmm1 +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm0 ; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0 -; AVX512DQ-NEXT: vpaddb (%rsi), %ymm2, %ymm1 +; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1 ; AVX512DQ-NEXT: vmovdqa %ymm1, (%rdx) ; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx) ; AVX512DQ-NEXT: vzeroupper @@ -5707,11 +5700,10 @@ define void @vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4(ptr %i ; ; AVX512BW-LABEL: vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,1,8,3,8,5,8,7] ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,9,0,11,0,13,0,15] -; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vpaddb (%rsi), %zmm2, %zmm0 +; AVX512BW-NEXT: vpermt2q (%rdi), %zmm0, %zmm1 +; AVX512BW-NEXT: vpaddb (%rsi), %zmm1, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -5938,13 +5930,12 @@ define void @vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2(ptr % ; ; AVX512F-LABEL: vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,9,2,3,8,9,6,7] ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,1,10,11,0,1,14,15] -; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm0 +; AVX512F-NEXT: vpermt2q (%rdi), %zmm0, %zmm1 +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm0 ; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0 -; AVX512F-NEXT: vpaddb (%rsi), %ymm2, %ymm1 +; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1 ; AVX512F-NEXT: vmovdqa %ymm1, (%rdx) ; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx) ; AVX512F-NEXT: vzeroupper @@ -5952,13 +5943,12 @@ define void @vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2(ptr % ; ; AVX512DQ-LABEL: vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,9,2,3,8,9,6,7] ; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,1,10,11,0,1,14,15] -; AVX512DQ-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0 +; AVX512DQ-NEXT: vpermt2q (%rdi), %zmm0, %zmm1 +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm0 ; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0 -; AVX512DQ-NEXT: vpaddb (%rsi), %ymm2, %ymm1 +; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1 ; AVX512DQ-NEXT: vmovdqa %ymm1, (%rdx) ; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx) ; AVX512DQ-NEXT: vzeroupper @@ -5966,11 +5956,10 @@ define void @vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2(ptr % ; ; AVX512BW-LABEL: vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,9,2,3,8,9,6,7] ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,1,10,11,0,1,14,15] -; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vpaddb (%rsi), %zmm2, %zmm0 +; AVX512BW-NEXT: vpermt2q (%rdi), %zmm0, %zmm1 +; AVX512BW-NEXT: vpaddb (%rsi), %zmm1, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq diff --git a/llvm/test/CodeGen/Xtensa/mul.ll b/llvm/test/CodeGen/Xtensa/mul.ll index 9b13897293dc1..c5995bbc479a6 100644 --- a/llvm/test/CodeGen/Xtensa/mul.ll +++ b/llvm/test/CodeGen/Xtensa/mul.ll @@ -4,7 +4,8 @@ define signext i32 @square(i32 %a) nounwind { ; XTENSA-LABEL: square: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: l32r a8, .LCPI0_0 @@ -20,7 +21,8 @@ define signext i32 @square(i32 %a) nounwind { define signext i32 @mul(i32 %a, i32 %b) nounwind { ; XTENSA-LABEL: mul: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: l32r a8, .LCPI1_0 @@ -35,7 +37,8 @@ define signext i32 @mul(i32 %a, i32 %b) nounwind { define signext i32 @mul_constant(i32 %a) nounwind { ; XTENSA-LABEL: mul_constant: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: movi a3, 5 @@ -51,7 +54,8 @@ define signext i32 @mul_constant(i32 %a) nounwind { define i32 @mul_pow2(i32 %a) nounwind { ; XTENSA-LABEL: mul_pow2: -; XTENSA: slli a2, a2, 3 +; XTENSA: # %bb.0: +; XTENSA-NEXT: slli a2, a2, 3 ; XTENSA-NEXT: ret %1 = mul i32 %a, 8 ret i32 %1 @@ -59,7 +63,8 @@ define i32 @mul_pow2(i32 %a) nounwind { define i64 @mul64(i64 %a, i64 %b) nounwind { ; XTENSA-LABEL: mul64: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: l32r a8, .LCPI4_0 @@ -74,7 +79,8 @@ define i64 @mul64(i64 %a, i64 %b) nounwind { define i64 @mul64_constant(i64 %a) nounwind { ; XTENSA-LABEL: mul64_constant: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: movi a4, 5 @@ -91,7 +97,8 @@ define i64 @mul64_constant(i64 %a) nounwind { define i32 @mulhs(i32 %a, i32 %b) nounwind { ; XTENSA-LABEL: mulhs: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: or a4, a3, a3 @@ -114,7 +121,8 @@ define i32 @mulhs(i32 %a, i32 %b) nounwind { define i32 @mulhs_positive_constant(i32 %a) nounwind { ; XTENSA-LABEL: mulhs_positive_constant: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: srai a3, a2, 31 @@ -136,7 +144,8 @@ define i32 @mulhs_positive_constant(i32 %a) nounwind { define i32 @mulhs_negative_constant(i32 %a) nounwind { ; XTENSA-LABEL: mulhs_negative_constant: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: srai a3, a2, 31 @@ -158,7 +167,8 @@ define i32 @mulhs_negative_constant(i32 %a) nounwind { define zeroext i32 @mulhu(i32 zeroext %a, i32 zeroext %b) nounwind { ; XTENSA-LABEL: mulhu: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: or a4, a3, a3 @@ -181,7 +191,8 @@ define zeroext i32 @mulhu(i32 zeroext %a, i32 zeroext %b) nounwind { define i32 @mulhsu(i32 %a, i32 %b) nounwind { ; XTENSA-LABEL: mulhsu: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: or a4, a3, a3 @@ -204,7 +215,8 @@ define i32 @mulhsu(i32 %a, i32 %b) nounwind { define i32 @mulhu_constant(i32 %a) nounwind { ; XTENSA-LABEL: mulhu_constant: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: movi a4, 5 @@ -226,7 +238,8 @@ define i32 @mulhu_constant(i32 %a) nounwind { define i32 @muli32_p65(i32 %a) nounwind { ; XTENSA-LABEL: muli32_p65: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: movi a3, 65 @@ -242,7 +255,8 @@ define i32 @muli32_p65(i32 %a) nounwind { define i32 @muli32_p63(i32 %a) nounwind { ; XTENSA-LABEL: muli32_p63: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: movi a3, 63 @@ -258,7 +272,8 @@ define i32 @muli32_p63(i32 %a) nounwind { define i64 @muli64_p65(i64 %a) nounwind { ; XTENSA-LABEL: muli64_p65: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: movi a4, 65 @@ -275,7 +290,8 @@ define i64 @muli64_p65(i64 %a) nounwind { define i64 @muli64_p63(i64 %a) nounwind { ; XTENSA-LABEL: muli64_p63: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: movi a4, 63 @@ -292,7 +308,8 @@ define i64 @muli64_p63(i64 %a) nounwind { define i32 @muli32_m63(i32 %a) nounwind { ; XTENSA-LABEL: muli32_m63: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: movi a3, -63 @@ -308,7 +325,8 @@ define i32 @muli32_m63(i32 %a) nounwind { define i32 @muli32_m65(i32 %a) nounwind { ; XTENSA-LABEL: muli32_m65: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: movi a3, -65 @@ -324,7 +342,8 @@ define i32 @muli32_m65(i32 %a) nounwind { define i64 @muli64_m63(i64 %a) nounwind { ; XTENSA-LABEL: muli64_m63: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: movi a4, -63 @@ -341,7 +360,8 @@ define i64 @muli64_m63(i64 %a) nounwind { define i64 @muli64_m65(i64 %a) nounwind { ; XTENSA-LABEL: muli64_m65: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: movi a4, -65 @@ -358,7 +378,8 @@ define i64 @muli64_m65(i64 %a) nounwind { define i32 @muli32_p384(i32 %a) nounwind { ; XTENSA-LABEL: muli32_p384: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: movi a3, 384 @@ -374,7 +395,8 @@ define i32 @muli32_p384(i32 %a) nounwind { define i32 @muli32_p12288(i32 %a) nounwind { ; XTENSA-LABEL: muli32_p12288: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: l32r a3, .LCPI21_0 @@ -390,7 +412,8 @@ define i32 @muli32_p12288(i32 %a) nounwind { define i32 @muli32_p4352(i32 %a) nounwind { ; XTENSA-LABEL: muli32_p4352: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: l32r a3, .LCPI22_0 @@ -406,7 +429,8 @@ define i32 @muli32_p4352(i32 %a) nounwind { define i32 @muli32_p3840(i32 %a) nounwind { ; XTENSA-LABEL: muli32_p3840: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: l32r a3, .LCPI23_0 @@ -422,7 +446,8 @@ define i32 @muli32_p3840(i32 %a) nounwind { define i32 @muli32_m3840(i32 %a) nounwind { ; XTENSA-LABEL: muli32_m3840: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: l32r a3, .LCPI24_0 @@ -438,7 +463,8 @@ define i32 @muli32_m3840(i32 %a) nounwind { define i32 @muli32_m4352(i32 %a) nounwind { ; XTENSA-LABEL: muli32_m4352: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: l32r a3, .LCPI25_0 @@ -454,7 +480,8 @@ define i32 @muli32_m4352(i32 %a) nounwind { define i64 @muli64_p4352(i64 %a) nounwind { ; XTENSA-LABEL: muli64_p4352: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: l32r a4, .LCPI26_0 @@ -471,7 +498,8 @@ define i64 @muli64_p4352(i64 %a) nounwind { define i64 @muli64_p3840(i64 %a) nounwind { ; XTENSA-LABEL: muli64_p3840: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: l32r a4, .LCPI27_0 @@ -488,7 +516,8 @@ define i64 @muli64_p3840(i64 %a) nounwind { define i64 @muli64_m4352(i64 %a) nounwind { ; XTENSA-LABEL: muli64_m4352: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: l32r a4, .LCPI28_0 @@ -505,7 +534,8 @@ define i64 @muli64_m4352(i64 %a) nounwind { define i64 @muli64_m3840(i64 %a) nounwind { ; XTENSA-LABEL: muli64_m3840: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: l32r a4, .LCPI29_0 @@ -522,17 +552,123 @@ define i64 @muli64_m3840(i64 %a) nounwind { define i128 @muli128_m3840(i128 %a) nounwind { ; XTENSA-LABEL: muli128_m3840: -; XTENSA: addi a8, a1, -16 -; XTENSA-NEXT: or a1, a8, a8 -; XTENSA-NEXT: s32i a0, a1, 8 # 4-byte Folded Spill -; XTENSA-NEXT: movi a7, -1 -; XTENSA-NEXT: s32i a7, a1, 4 -; XTENSA-NEXT: s32i a7, a1, 0 -; XTENSA-NEXT: l32r a6, .LCPI30_0 -; XTENSA-NEXT: l32r a8, .LCPI30_1 -; XTENSA-NEXT: callx0 a8 -; XTENSA-NEXT: l32i a0, a1, 8 # 4-byte Folded Reload -; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -80 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 64 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a12, a1, 60 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a13, a1, 56 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a14, a1, 52 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a15, a1, 48 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a5, a1, 20 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a4, a1, 16 # 4-byte Folded Spill +; XTENSA-NEXT: or a15, a3, a3 +; XTENSA-NEXT: l32r a14, .LCPI30_0 +; XTENSA-NEXT: movi a12, 0 +; XTENSA-NEXT: l32r a13, .LCPI30_1 +; XTENSA-NEXT: s32i a2, a1, 36 # 4-byte Folded Spill +; XTENSA-NEXT: or a3, a12, a12 +; XTENSA-NEXT: or a4, a14, a14 +; XTENSA-NEXT: or a5, a12, a12 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: s32i a2, a1, 28 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a3, a1, 44 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a15, a1, 40 # 4-byte Folded Spill +; XTENSA-NEXT: or a2, a15, a15 +; XTENSA-NEXT: or a3, a12, a12 +; XTENSA-NEXT: s32i a14, a1, 12 # 4-byte Folded Spill +; XTENSA-NEXT: or a4, a14, a14 +; XTENSA-NEXT: or a5, a12, a12 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: l32i a8, a1, 44 # 4-byte Folded Reload +; XTENSA-NEXT: add a15, a2, a8 +; XTENSA-NEXT: movi a8, 1 +; XTENSA-NEXT: s32i a8, a1, 44 # 4-byte Folded Spill +; XTENSA-NEXT: bltu a15, a2, .LBB30_2 +; XTENSA-NEXT: # %bb.1: +; XTENSA-NEXT: or a8, a12, a12 +; XTENSA-NEXT: .LBB30_2: +; XTENSA-NEXT: add a8, a3, a8 +; XTENSA-NEXT: s32i a8, a1, 32 # 4-byte Folded Spill +; XTENSA-NEXT: movi a14, -1 +; XTENSA-NEXT: l32i a2, a1, 36 # 4-byte Folded Reload +; XTENSA-NEXT: or a3, a12, a12 +; XTENSA-NEXT: or a4, a14, a14 +; XTENSA-NEXT: or a5, a12, a12 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: add a9, a2, a15 +; XTENSA-NEXT: l32i a8, a1, 44 # 4-byte Folded Reload +; XTENSA-NEXT: s32i a9, a1, 24 # 4-byte Folded Spill +; XTENSA-NEXT: bltu a9, a2, .LBB30_4 +; XTENSA-NEXT: # %bb.3: +; XTENSA-NEXT: or a8, a12, a12 +; XTENSA-NEXT: .LBB30_4: +; XTENSA-NEXT: add a8, a3, a8 +; XTENSA-NEXT: l32i a9, a1, 32 # 4-byte Folded Reload +; XTENSA-NEXT: add a15, a9, a8 +; XTENSA-NEXT: l32i a2, a1, 40 # 4-byte Folded Reload +; XTENSA-NEXT: or a3, a12, a12 +; XTENSA-NEXT: or a4, a14, a14 +; XTENSA-NEXT: or a5, a12, a12 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: s32i a3, a1, 4 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a15, a1, 8 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a2, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: add a15, a2, a15 +; XTENSA-NEXT: l32i a2, a1, 16 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a3, a1, 20 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a4, a1, 12 # 4-byte Folded Reload +; XTENSA-NEXT: or a5, a14, a14 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: s32i a2, a1, 16 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a3, a1, 20 # 4-byte Folded Spill +; XTENSA-NEXT: l32i a2, a1, 36 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a3, a1, 40 # 4-byte Folded Reload +; XTENSA-NEXT: or a4, a14, a14 +; XTENSA-NEXT: or a5, a14, a14 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: l32i a8, a1, 16 # 4-byte Folded Reload +; XTENSA-NEXT: add a9, a2, a8 +; XTENSA-NEXT: add a4, a15, a9 +; XTENSA-NEXT: l32i a7, a1, 44 # 4-byte Folded Reload +; XTENSA-NEXT: or a8, a7, a7 +; XTENSA-NEXT: bltu a4, a15, .LBB30_6 +; XTENSA-NEXT: # %bb.5: +; XTENSA-NEXT: or a8, a12, a12 +; XTENSA-NEXT: .LBB30_6: +; XTENSA-NEXT: or a10, a7, a7 +; XTENSA-NEXT: l32i a11, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: bltu a15, a11, .LBB30_8 +; XTENSA-NEXT: # %bb.7: +; XTENSA-NEXT: or a10, a12, a12 +; XTENSA-NEXT: .LBB30_8: +; XTENSA-NEXT: or a11, a7, a7 +; XTENSA-NEXT: l32i a6, a1, 32 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a5, a1, 8 # 4-byte Folded Reload +; XTENSA-NEXT: bltu a5, a6, .LBB30_10 +; XTENSA-NEXT: # %bb.9: +; XTENSA-NEXT: or a11, a12, a12 +; XTENSA-NEXT: .LBB30_10: +; XTENSA-NEXT: l32i a6, a1, 4 # 4-byte Folded Reload +; XTENSA-NEXT: add a11, a6, a11 +; XTENSA-NEXT: add a10, a11, a10 +; XTENSA-NEXT: bltu a9, a2, .LBB30_12 +; XTENSA-NEXT: # %bb.11: +; XTENSA-NEXT: or a7, a12, a12 +; XTENSA-NEXT: .LBB30_12: +; XTENSA-NEXT: l32i a9, a1, 20 # 4-byte Folded Reload +; XTENSA-NEXT: add a9, a3, a9 +; XTENSA-NEXT: add a9, a9, a7 +; XTENSA-NEXT: add a9, a10, a9 +; XTENSA-NEXT: add a5, a9, a8 +; XTENSA-NEXT: l32i a2, a1, 28 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a3, a1, 24 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a15, a1, 48 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a14, a1, 52 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a13, a1, 56 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a12, a1, 60 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a0, a1, 64 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 80 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: ret %1 = mul i128 %a, -3840 @@ -541,17 +677,123 @@ define i128 @muli128_m3840(i128 %a) nounwind { define i128 @muli128_m63(i128 %a) nounwind { ; XTENSA-LABEL: muli128_m63: -; XTENSA: addi a8, a1, -16 -; XTENSA-NEXT: or a1, a8, a8 -; XTENSA-NEXT: s32i a0, a1, 8 # 4-byte Folded Spill -; XTENSA-NEXT: movi a7, -1 -; XTENSA-NEXT: s32i a7, a1, 4 -; XTENSA-NEXT: s32i a7, a1, 0 -; XTENSA-NEXT: movi a6, -63 -; XTENSA-NEXT: l32r a8, .LCPI31_0 -; XTENSA-NEXT: callx0 a8 -; XTENSA-NEXT: l32i a0, a1, 8 # 4-byte Folded Reload -; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -80 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 64 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a12, a1, 60 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a13, a1, 56 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a14, a1, 52 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a15, a1, 48 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a5, a1, 20 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a4, a1, 16 # 4-byte Folded Spill +; XTENSA-NEXT: or a15, a3, a3 +; XTENSA-NEXT: movi a14, -63 +; XTENSA-NEXT: movi a12, 0 +; XTENSA-NEXT: l32r a13, .LCPI31_0 +; XTENSA-NEXT: s32i a2, a1, 36 # 4-byte Folded Spill +; XTENSA-NEXT: or a3, a12, a12 +; XTENSA-NEXT: or a4, a14, a14 +; XTENSA-NEXT: or a5, a12, a12 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: s32i a2, a1, 28 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a3, a1, 44 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a15, a1, 40 # 4-byte Folded Spill +; XTENSA-NEXT: or a2, a15, a15 +; XTENSA-NEXT: or a3, a12, a12 +; XTENSA-NEXT: s32i a14, a1, 12 # 4-byte Folded Spill +; XTENSA-NEXT: or a4, a14, a14 +; XTENSA-NEXT: or a5, a12, a12 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: l32i a8, a1, 44 # 4-byte Folded Reload +; XTENSA-NEXT: add a15, a2, a8 +; XTENSA-NEXT: movi a8, 1 +; XTENSA-NEXT: s32i a8, a1, 44 # 4-byte Folded Spill +; XTENSA-NEXT: bltu a15, a2, .LBB31_2 +; XTENSA-NEXT: # %bb.1: +; XTENSA-NEXT: or a8, a12, a12 +; XTENSA-NEXT: .LBB31_2: +; XTENSA-NEXT: add a8, a3, a8 +; XTENSA-NEXT: s32i a8, a1, 32 # 4-byte Folded Spill +; XTENSA-NEXT: movi a14, -1 +; XTENSA-NEXT: l32i a2, a1, 36 # 4-byte Folded Reload +; XTENSA-NEXT: or a3, a12, a12 +; XTENSA-NEXT: or a4, a14, a14 +; XTENSA-NEXT: or a5, a12, a12 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: add a9, a2, a15 +; XTENSA-NEXT: l32i a8, a1, 44 # 4-byte Folded Reload +; XTENSA-NEXT: s32i a9, a1, 24 # 4-byte Folded Spill +; XTENSA-NEXT: bltu a9, a2, .LBB31_4 +; XTENSA-NEXT: # %bb.3: +; XTENSA-NEXT: or a8, a12, a12 +; XTENSA-NEXT: .LBB31_4: +; XTENSA-NEXT: add a8, a3, a8 +; XTENSA-NEXT: l32i a9, a1, 32 # 4-byte Folded Reload +; XTENSA-NEXT: add a15, a9, a8 +; XTENSA-NEXT: l32i a2, a1, 40 # 4-byte Folded Reload +; XTENSA-NEXT: or a3, a12, a12 +; XTENSA-NEXT: or a4, a14, a14 +; XTENSA-NEXT: or a5, a12, a12 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: s32i a3, a1, 4 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a15, a1, 8 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a2, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: add a15, a2, a15 +; XTENSA-NEXT: l32i a2, a1, 16 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a3, a1, 20 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a4, a1, 12 # 4-byte Folded Reload +; XTENSA-NEXT: or a5, a14, a14 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: s32i a2, a1, 16 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a3, a1, 20 # 4-byte Folded Spill +; XTENSA-NEXT: l32i a2, a1, 36 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a3, a1, 40 # 4-byte Folded Reload +; XTENSA-NEXT: or a4, a14, a14 +; XTENSA-NEXT: or a5, a14, a14 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: l32i a8, a1, 16 # 4-byte Folded Reload +; XTENSA-NEXT: add a9, a2, a8 +; XTENSA-NEXT: add a4, a15, a9 +; XTENSA-NEXT: l32i a7, a1, 44 # 4-byte Folded Reload +; XTENSA-NEXT: or a8, a7, a7 +; XTENSA-NEXT: bltu a4, a15, .LBB31_6 +; XTENSA-NEXT: # %bb.5: +; XTENSA-NEXT: or a8, a12, a12 +; XTENSA-NEXT: .LBB31_6: +; XTENSA-NEXT: or a10, a7, a7 +; XTENSA-NEXT: l32i a11, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: bltu a15, a11, .LBB31_8 +; XTENSA-NEXT: # %bb.7: +; XTENSA-NEXT: or a10, a12, a12 +; XTENSA-NEXT: .LBB31_8: +; XTENSA-NEXT: or a11, a7, a7 +; XTENSA-NEXT: l32i a6, a1, 32 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a5, a1, 8 # 4-byte Folded Reload +; XTENSA-NEXT: bltu a5, a6, .LBB31_10 +; XTENSA-NEXT: # %bb.9: +; XTENSA-NEXT: or a11, a12, a12 +; XTENSA-NEXT: .LBB31_10: +; XTENSA-NEXT: l32i a6, a1, 4 # 4-byte Folded Reload +; XTENSA-NEXT: add a11, a6, a11 +; XTENSA-NEXT: add a10, a11, a10 +; XTENSA-NEXT: bltu a9, a2, .LBB31_12 +; XTENSA-NEXT: # %bb.11: +; XTENSA-NEXT: or a7, a12, a12 +; XTENSA-NEXT: .LBB31_12: +; XTENSA-NEXT: l32i a9, a1, 20 # 4-byte Folded Reload +; XTENSA-NEXT: add a9, a3, a9 +; XTENSA-NEXT: add a9, a9, a7 +; XTENSA-NEXT: add a9, a10, a9 +; XTENSA-NEXT: add a5, a9, a8 +; XTENSA-NEXT: l32i a2, a1, 28 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a3, a1, 24 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a15, a1, 48 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a14, a1, 52 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a13, a1, 56 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a12, a1, 60 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a0, a1, 64 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 80 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: ret %1 = mul i128 %a, -63 @@ -560,22 +802,119 @@ define i128 @muli128_m63(i128 %a) nounwind { define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind { ; XTENSA-LABEL: mulhsu_i64: -; XTENSA: addi a8, a1, -16 -; XTENSA-NEXT: or a1, a8, a8 -; XTENSA-NEXT: s32i a0, a1, 8 # 4-byte Folded Spill -; XTENSA-NEXT: or a7, a5, a5 -; XTENSA-NEXT: or a6, a4, a4 -; XTENSA-NEXT: srai a8, a7, 31 -; XTENSA-NEXT: s32i a8, a1, 4 -; XTENSA-NEXT: s32i a8, a1, 0 -; XTENSA-NEXT: movi a4, 0 -; XTENSA-NEXT: l32r a8, .LCPI32_0 -; XTENSA-NEXT: or a5, a4, a4 -; XTENSA-NEXT: callx0 a8 -; XTENSA-NEXT: or a2, a4, a4 -; XTENSA-NEXT: or a3, a5, a5 -; XTENSA-NEXT: l32i a0, a1, 8 # 4-byte Folded Reload -; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -64 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 56 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a12, a1, 52 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a13, a1, 48 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a14, a1, 44 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a15, a1, 40 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a5, a1, 28 # 4-byte Folded Spill +; XTENSA-NEXT: or a14, a4, a4 +; XTENSA-NEXT: or a15, a3, a3 +; XTENSA-NEXT: movi a12, 0 +; XTENSA-NEXT: l32r a13, .LCPI32_0 +; XTENSA-NEXT: s32i a2, a1, 32 # 4-byte Folded Spill +; XTENSA-NEXT: or a3, a12, a12 +; XTENSA-NEXT: or a5, a12, a12 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: s32i a3, a1, 24 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a15, a1, 36 # 4-byte Folded Spill +; XTENSA-NEXT: or a2, a15, a15 +; XTENSA-NEXT: or a3, a12, a12 +; XTENSA-NEXT: s32i a14, a1, 16 # 4-byte Folded Spill +; XTENSA-NEXT: or a4, a14, a14 +; XTENSA-NEXT: or a5, a12, a12 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: l32i a8, a1, 24 # 4-byte Folded Reload +; XTENSA-NEXT: add a14, a2, a8 +; XTENSA-NEXT: movi a15, 1 +; XTENSA-NEXT: or a8, a15, a15 +; XTENSA-NEXT: bltu a14, a2, .LBB32_2 +; XTENSA-NEXT: # %bb.1: +; XTENSA-NEXT: or a8, a12, a12 +; XTENSA-NEXT: .LBB32_2: +; XTENSA-NEXT: add a8, a3, a8 +; XTENSA-NEXT: s32i a8, a1, 24 # 4-byte Folded Spill +; XTENSA-NEXT: l32i a2, a1, 32 # 4-byte Folded Reload +; XTENSA-NEXT: or a3, a12, a12 +; XTENSA-NEXT: l32i a4, a1, 28 # 4-byte Folded Reload +; XTENSA-NEXT: or a5, a12, a12 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: add a9, a2, a14 +; XTENSA-NEXT: s32i a15, a1, 20 # 4-byte Folded Spill +; XTENSA-NEXT: or a8, a15, a15 +; XTENSA-NEXT: bltu a9, a2, .LBB32_4 +; XTENSA-NEXT: # %bb.3: +; XTENSA-NEXT: or a8, a12, a12 +; XTENSA-NEXT: .LBB32_4: +; XTENSA-NEXT: add a8, a3, a8 +; XTENSA-NEXT: l32i a9, a1, 24 # 4-byte Folded Reload +; XTENSA-NEXT: add a14, a9, a8 +; XTENSA-NEXT: l32i a2, a1, 36 # 4-byte Folded Reload +; XTENSA-NEXT: or a3, a12, a12 +; XTENSA-NEXT: l32i a15, a1, 28 # 4-byte Folded Reload +; XTENSA-NEXT: or a4, a15, a15 +; XTENSA-NEXT: or a5, a12, a12 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: s32i a3, a1, 8 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a14, a1, 12 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a2, a1, 4 # 4-byte Folded Spill +; XTENSA-NEXT: add a14, a2, a14 +; XTENSA-NEXT: l32i a2, a1, 16 # 4-byte Folded Reload +; XTENSA-NEXT: or a3, a15, a15 +; XTENSA-NEXT: or a4, a12, a12 +; XTENSA-NEXT: or a5, a12, a12 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: s32i a2, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a3, a1, 16 # 4-byte Folded Spill +; XTENSA-NEXT: srai a2, a15, 31 +; XTENSA-NEXT: or a3, a2, a2 +; XTENSA-NEXT: l32i a4, a1, 32 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a5, a1, 36 # 4-byte Folded Reload +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: or a8, a2, a2 +; XTENSA-NEXT: l32i a9, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: add a10, a8, a9 +; XTENSA-NEXT: add a2, a14, a10 +; XTENSA-NEXT: l32i a6, a1, 20 # 4-byte Folded Reload +; XTENSA-NEXT: or a9, a6, a6 +; XTENSA-NEXT: bltu a2, a14, .LBB32_6 +; XTENSA-NEXT: # %bb.5: +; XTENSA-NEXT: or a9, a12, a12 +; XTENSA-NEXT: .LBB32_6: +; XTENSA-NEXT: or a11, a6, a6 +; XTENSA-NEXT: l32i a7, a1, 4 # 4-byte Folded Reload +; XTENSA-NEXT: bltu a14, a7, .LBB32_8 +; XTENSA-NEXT: # %bb.7: +; XTENSA-NEXT: or a11, a12, a12 +; XTENSA-NEXT: .LBB32_8: +; XTENSA-NEXT: or a7, a6, a6 +; XTENSA-NEXT: l32i a5, a1, 24 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a4, a1, 12 # 4-byte Folded Reload +; XTENSA-NEXT: bltu a4, a5, .LBB32_10 +; XTENSA-NEXT: # %bb.9: +; XTENSA-NEXT: or a7, a12, a12 +; XTENSA-NEXT: .LBB32_10: +; XTENSA-NEXT: l32i a5, a1, 8 # 4-byte Folded Reload +; XTENSA-NEXT: add a7, a5, a7 +; XTENSA-NEXT: add a11, a7, a11 +; XTENSA-NEXT: bltu a10, a8, .LBB32_12 +; XTENSA-NEXT: # %bb.11: +; XTENSA-NEXT: or a6, a12, a12 +; XTENSA-NEXT: .LBB32_12: +; XTENSA-NEXT: l32i a8, a1, 16 # 4-byte Folded Reload +; XTENSA-NEXT: add a8, a3, a8 +; XTENSA-NEXT: add a8, a8, a6 +; XTENSA-NEXT: add a8, a11, a8 +; XTENSA-NEXT: add a3, a8, a9 +; XTENSA-NEXT: l32i a15, a1, 40 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a14, a1, 44 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a13, a1, 48 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a12, a1, 52 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a0, a1, 56 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 64 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: ret %1 = zext i64 %a to i128 @@ -588,7 +927,8 @@ define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind { define i8 @muladd_demand(i8 %x, i8 %y) nounwind { ; XTENSA-LABEL: muladd_demand: -; XTENSA: slli a8, a2, 1 +; XTENSA: # %bb.0: +; XTENSA-NEXT: slli a8, a2, 1 ; XTENSA-NEXT: sub a8, a3, a8 ; XTENSA-NEXT: movi a9, 15 ; XTENSA-NEXT: and a2, a8, a9 @@ -601,7 +941,8 @@ define i8 @muladd_demand(i8 %x, i8 %y) nounwind { define i8 @mulsub_demand(i8 %x, i8 %y) nounwind { ; XTENSA-LABEL: mulsub_demand: -; XTENSA: addx2 a8, a2, a3 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addx2 a8, a2, a3 ; XTENSA-NEXT: movi a9, 15 ; XTENSA-NEXT: and a2, a8, a9 ; XTENSA-NEXT: ret @@ -613,7 +954,8 @@ define i8 @mulsub_demand(i8 %x, i8 %y) nounwind { define i8 @muladd_demand_2(i8 %x, i8 %y) nounwind { ; XTENSA-LABEL: muladd_demand_2: -; XTENSA: slli a8, a2, 1 +; XTENSA: # %bb.0: +; XTENSA-NEXT: slli a8, a2, 1 ; XTENSA-NEXT: sub a8, a3, a8 ; XTENSA-NEXT: movi a9, -16 ; XTENSA-NEXT: or a2, a8, a9 @@ -626,7 +968,8 @@ define i8 @muladd_demand_2(i8 %x, i8 %y) nounwind { define i8 @mulsub_demand_2(i8 %x, i8 %y) nounwind { ; XTENSA-LABEL: mulsub_demand_2: -; XTENSA: addx2 a8, a2, a3 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addx2 a8, a2, a3 ; XTENSA-NEXT: movi a9, -16 ; XTENSA-NEXT: or a2, a8, a9 ; XTENSA-NEXT: ret @@ -638,7 +981,8 @@ define i8 @mulsub_demand_2(i8 %x, i8 %y) nounwind { define signext i32 @mul_imm_2(i32 %a) nounwind { ; XTENSA-LABEL: mul_imm_2: -; XTENSA: slli a2, a2, 1 +; XTENSA: # %bb.0: +; XTENSA-NEXT: slli a2, a2, 1 ; XTENSA-NEXT: ret %1 = mul i32 %a, 2 ret i32 %1 @@ -646,7 +990,8 @@ define signext i32 @mul_imm_2(i32 %a) nounwind { define signext i32 @mul_imm_1024(i32 %a) nounwind { ; XTENSA-LABEL: mul_imm_1024: -; XTENSA: slli a2, a2, 10 +; XTENSA: # %bb.0: +; XTENSA-NEXT: slli a2, a2, 10 ; XTENSA-NEXT: ret %1 = mul i32 %a, 1024 ret i32 %1 @@ -654,7 +999,8 @@ define signext i32 @mul_imm_1024(i32 %a) nounwind { define signext i32 @mul_imm_16384(i32 %a) nounwind { ; XTENSA-LABEL: mul_imm_16384: -; XTENSA: slli a2, a2, 14 +; XTENSA: # %bb.0: +; XTENSA-NEXT: slli a2, a2, 14 ; XTENSA-NEXT: ret %1 = mul i32 %a, 16384 ret i32 %1 @@ -662,7 +1008,9 @@ define signext i32 @mul_imm_16384(i32 %a) nounwind { define <4 x i32> @mul_vec_splat_constant(<4 x i32> %a) { ; XTENSA-LABEL: mul_vec_splat_constant: -; XTENSA: slli a2, a2, 2 +; XTENSA: .cfi_startproc +; XTENSA-NEXT: # %bb.0: +; XTENSA-NEXT: slli a2, a2, 2 ; XTENSA-NEXT: slli a3, a3, 2 ; XTENSA-NEXT: slli a4, a4, 2 ; XTENSA-NEXT: slli a5, a5, 2 diff --git a/llvm/test/Instrumentation/BoundsChecking/negative.ll b/llvm/test/Instrumentation/BoundsChecking/negative.ll new file mode 100644 index 0000000000000..d8fb117bd13af --- /dev/null +++ b/llvm/test/Instrumentation/BoundsChecking/negative.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; Check that negative oob gep do not generate invalid check. +; RUN: opt < %s -passes=bounds-checking -S | FileCheck %s +target datalayout = "e-p:64:64:64-p1:16:16:16-p2:64:64:64:48-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + + +@str = global [100 x i8] zeroinitializer, align 1 + +define i16 @main() { +; CHECK-LABEL: @main( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[I_0:%.*]] = phi i8 [ 65, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[TMP4:%.*]] ] +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i8 [[I_0]], 76 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[TMP4]] +; CHECK: for.inc: +; CHECK-NEXT: [[I_0_C:%.*]] = sext i8 [[I_0]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 -65, [[I_0_C]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr getelementptr (i8, ptr @str, i8 -65), i8 [[I_0]] +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 100, [[TMP0]] +; CHECK-NEXT: store i8 [[I_0]], ptr [[GEP]], align 1 +; CHECK-NEXT: [[INC]] = add nuw nsw i8 [[I_0]], 1 +; CHECK-NEXT: br label [[FOR_COND]] +; CHECK: for.end: +; CHECK-NEXT: ret i16 0 +; +entry: + br label %for.cond + +for.cond: + %i.0 = phi i8 [ 65, %entry ], [ %inc, %for.inc ] + %exitcond.not = icmp eq i8 %i.0, 76 + br i1 %exitcond.not, label %for.end, label %for.inc + +for.inc: ; preds = %for.cond + %gep = getelementptr i8, ptr getelementptr (i8, ptr @str, i8 -65), i8 %i.0 + store i8 %i.0, ptr %gep, align 1 + %inc = add nuw nsw i8 %i.0, 1 + br label %for.cond + +for.end: + ret i16 0 +} + diff --git a/llvm/test/MC/AMDGPU/ds.s b/llvm/test/MC/AMDGPU/ds.s index fd436fe9fe0dd..bb1840eb849df 100644 --- a/llvm/test/MC/AMDGPU/ds.s +++ b/llvm/test/MC/AMDGPU/ds.s @@ -1,9 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=CI,SICI // RUN: llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI -// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vbuffer_mubuf.s b/llvm/test/MC/AMDGPU/gfx12_asm_vbuffer_mubuf.s index efeaf8339f692..40ea9bb3678d9 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vbuffer_mubuf.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vbuffer_mubuf.s @@ -2636,16 +2636,16 @@ buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:8388607 th:TH_ATOMIC_CASCA // GFX12: encoding: [0x03,0x80,0x16,0xc4,0x05,0x10,0xe8,0x00,0x00,0xff,0xff,0x7f] buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0 offset:8388607 -// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode buffer_atomic_pk_add_bf16 v5, off, s[8:11], -1 offset:8388607 -// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0.5 offset:8388607 -// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode buffer_atomic_pk_add_bf16 v5, off, s[8:11], -4.0 offset:8388607 -// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:8388607 glc // GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/gfx950-unsupported.s b/llvm/test/MC/AMDGPU/gfx950-unsupported.s index f8bbd40b700fd..225784177ae18 100644 --- a/llvm/test/MC/AMDGPU/gfx950-unsupported.s +++ b/llvm/test/MC/AMDGPU/gfx950-unsupported.s @@ -1,4 +1,5 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck -check-prefix=ERR %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx950 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck -check-prefix=W32-ERR %s //===----------------------------------------------------------------------===// // v_mfma_f32_32x32x4_xf32 @@ -177,3 +178,79 @@ v_mfma_f32_16x16x8_xf32 a[0:3], v[0:3], v[0:3], a[4:7] v_mfma_f32_16x16x8_xf32 v[0:3], a[0:3], a[0:3], v[4:7] // ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +//===----------------------------------------------------------------------===// +// ds_read_b64_tr_b4 +//===----------------------------------------------------------------------===// +ds_read_b64_tr_b4 v[1:2], v0 +// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +ds_read_b64_tr_b4 v1, v0 +// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +ds_read_b64_tr_b4 v[0:1], s0 +// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +ds_read_b64_tr_b4 v[2:3], v2 offset:-64 +// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: expected a 16-bit unsigned offset +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +//===----------------------------------------------------------------------===// +//ds_read_b64_tr_b8 +//===----------------------------------------------------------------------===// +ds_read_b64_tr_b8 v[1:2], v0 +// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +ds_read_b64_tr_b8 v1, v0 +// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +ds_read_b64_tr_b8 v[0:1], s0 +// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +ds_read_b64_tr_b8 v[2:3], v2 offset:-64 +// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: expected a 16-bit unsigned offset +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +//===----------------------------------------------------------------------===// +// ds_read_b64_tr_b16 +//===----------------------------------------------------------------------===// +ds_read_b64_tr_b16 v[1:2], v0 +// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +ds_read_b64_tr_b16 v1, v0 +// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +ds_read_b64_tr_b16 v[0:1], s0 +// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +ds_read_b64_tr_b16 v[2:3], v2 offset:-64 +// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: expected a 16-bit unsigned offset +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +//===----------------------------------------------------------------------===// +// ds_read_b96_tr_b6 +//===----------------------------------------------------------------------===// +ds_read_b96_tr_b6 v[1:3], v0 +// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +ds_read_b96_tr_b6 v1, v0 +// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +ds_read_b96_tr_b6 v[0:3], s0 +// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +ds_read_b96_tr_b6 v[2:4], v2 offset:-64 +// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: expected a 16-bit unsigned offset +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s index 405d152c93d86..75022d8cf0cdd 100644 --- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s +++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s @@ -35,3 +35,1247 @@ global_load_lds_dwordx4 v[2:3], off offset:4 // NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: // GFX950: global_load_lds_dwordx4 v2, s[4:5] offset:4 ; encoding: [0x04,0x80,0xf4,0xdd,0x02,0x00,0x04,0x00] global_load_lds_dwordx4 v2, s[4:5] offset:4 + + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane16_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb3,0x02,0x7e] +v_permlane16_swap_b32 v1, v2 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane16_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb3,0x02,0x7e] +v_permlane16_swap_b32_e32 v1, v2 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane16_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x99,0xd1,0x02,0x01,0x00,0x00] +v_permlane16_swap_b32_e64 v1, v2 + +// FIXME: Parsed as bound_ctrl:1? +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x99,0xd1,0x02,0x01,0x00,0x00] +v_permlane16_swap_b32 v1, v2 bound_ctrl:0 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane16_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x99,0xd1,0x02,0x01,0x00,0x00] +v_permlane16_swap_b32 v1, v2 fi:0 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x99,0xd1,0x02,0x01,0x00,0x00] +v_permlane16_swap_b32 v1, v2 bound_ctrl:1 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane16_swap_b32_e64 v1, v2 fi:1 ; encoding: [0x01,0x08,0x99,0xd1,0x02,0x01,0x00,0x00] +v_permlane16_swap_b32 v1, v2 fi:1 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 fi:1 ; encoding: [0x01,0x18,0x99,0xd1,0x02,0x01,0x00,0x00] +v_permlane16_swap_b32 v1, v2 bound_ctrl:1 fi:1 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 fi:1 ; encoding: [0x01,0x18,0x99,0xd1,0x02,0x01,0x00,0x00] +v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 fi:1 + +// FIXME: Swapped order not accepted +// v_permlane16_swap_b32 v1, v2 fi:1 bound_ctrl:1 + + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane32_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb5,0x02,0x7e] +v_permlane32_swap_b32 v1, v2 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane32_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb5,0x02,0x7e] +v_permlane32_swap_b32_e32 v1, v2 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane32_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00] +v_permlane32_swap_b32_e64 v1, v2 + +// FIXME: Parsed as bound_ctrl:1? +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane32_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x9a,0xd1,0x02,0x01,0x00,0x00] +v_permlane32_swap_b32 v1, v2 bound_ctrl:0 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane32_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00] +v_permlane32_swap_b32 v1, v2 fi:0 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane32_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x9a,0xd1,0x02,0x01,0x00,0x00] +v_permlane32_swap_b32 v1, v2 bound_ctrl:1 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane32_swap_b32_e64 v1, v2 fi:1 ; encoding: [0x01,0x08,0x9a,0xd1,0x02,0x01,0x00,0x00] +v_permlane32_swap_b32 v1, v2 fi:1 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane32_swap_b32_e64 v1, v2 bound_ctrl:1 fi:1 ; encoding: [0x01,0x18,0x9a,0xd1,0x02,0x01,0x00,0x00] +v_permlane32_swap_b32 v1, v2 bound_ctrl:1 fi:1 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane32_swap_b32_e64 v1, v2 bound_ctrl:1 fi:1 ; encoding: [0x01,0x18,0x9a,0xd1,0x02,0x01,0x00,0x00] +v_permlane32_swap_b32_e64 v1, v2 bound_ctrl:1 fi:1 + +// FIXME: Swapped order not accepted +// v_permlane32_swap_b32 v1, v2 fi:1 bound_ctrl:1 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x4a,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x4a,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x4a,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x4a,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x4a,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4a,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[0,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x4a,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, s1, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x01,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, s1, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, s2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x4a,0xd2,0x02,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, s2, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, s3, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x4a,0xd2,0x03,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, s3, v3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, s4, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x4a,0xd2,0x04,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, s4, v3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, s1, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x4a,0xd2,0x01,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, s1, v3 op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, s2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x4a,0xd2,0x02,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, s2, v3 op_sel:[1,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, s3, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4a,0xd2,0x03,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, s3, v3 op_sel:[0,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, s4, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x4a,0xd2,0x04,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, s4, v3 op_sel:[1,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, 11, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x8b,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, 11, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, 22, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x4a,0xd2,0x96,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, 22, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, 33, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x4a,0xd2,0xa1,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, 33, v3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, 44, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x4a,0xd2,0xac,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, 44, v3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, 11, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4a,0xd2,0x8b,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, 11, v3 op_sel:[0,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, 22, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x4a,0xd2,0x96,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, 22, v3 op_sel:[1,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, 33, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4a,0xd2,0xa1,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, 33, v3 op_sel:[0,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_fp8 v1, 44, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x4a,0xd2,0xac,0x06,0x02,0x00] +v_cvt_scalef32_f16_fp8 v1, 44, v3 op_sel:[1,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 ; encoding: [0x01,0x00,0x3b,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x3b,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x3b,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x3b,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x3b,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x3b,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x3b,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[0,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3b,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, s1, v3 ; encoding: [0x01,0x00,0x3b,0xd2,0x01,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, s1, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, s2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x3b,0xd2,0x02,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, s2, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, s3, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x3b,0xd2,0x03,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, s3, v3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, s4, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x3b,0xd2,0x04,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, s4, v3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, s1, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x3b,0xd2,0x01,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, s1, v3 op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, s2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x3b,0xd2,0x02,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, s2, v3 op_sel:[1,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, s3, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x3b,0xd2,0x03,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, s3, v3 op_sel:[0,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, s4, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3b,0xd2,0x04,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, s4, v3 op_sel:[1,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, 11, v3 ; encoding: [0x01,0x00,0x3b,0xd2,0x8b,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, 11, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, 22, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x3b,0xd2,0x96,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, 22, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, 33, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x3b,0xd2,0xa1,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, 33, v3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, 44, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x3b,0xd2,0xac,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, 44, v3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, 11, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x3b,0xd2,0x8b,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, 11, v3 op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, 22, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x3b,0xd2,0x96,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, 22, v3 op_sel:[1,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, 33, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x3b,0xd2,0xa1,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, 33, v3 op_sel:[0,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_fp8 v1, 44, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3b,0xd2,0xac,0x06,0x02,0x00] +v_cvt_scalef32_f32_fp8 v1, 44, v3 op_sel:[1,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 ; encoding: [0x01,0x00,0x4b,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f16_bf8 v1, v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x4b,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x4b,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x4b,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x4b,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x4b,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[1,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4b,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[0,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x4b,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[1,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_bf8 v1, s1, v3 ; encoding: [0x01,0x00,0x4b,0xd2,0x01,0x06,0x02,0x00] +v_cvt_scalef32_f16_bf8 v1, s1, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_bf8 v1, s2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x4b,0xd2,0x02,0x06,0x02,0x00] +v_cvt_scalef32_f16_bf8 v1, s2, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_bf8 v1, s3, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x4b,0xd2,0x03,0x06,0x02,0x00] +v_cvt_scalef32_f16_bf8 v1, s3, v3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_bf8 v1, s4, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x4b,0xd2,0x04,0x06,0x02,0x00] +v_cvt_scalef32_f16_bf8 v1, s4, v3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_bf8 v1, s1, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x4b,0xd2,0x01,0x06,0x02,0x00] +v_cvt_scalef32_f16_bf8 v1, s1, v3 op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_bf8 v1, s2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x4b,0xd2,0x02,0x06,0x02,0x00] +v_cvt_scalef32_f16_bf8 v1, s2, v3 op_sel:[1,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_bf8 v1, s3, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4b,0xd2,0x03,0x06,0x02,0x00] +v_cvt_scalef32_f16_bf8 v1, s3, v3 op_sel:[0,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_bf8 v1, s4, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x4b,0xd2,0x04,0x06,0x02,0x00] +v_cvt_scalef32_f16_bf8 v1, s4, v3 op_sel:[1,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_bf8 v1, 11, v3 ; encoding: [0x01,0x00,0x4b,0xd2,0x8b,0x06,0x02,0x00] +v_cvt_scalef32_f16_bf8 v1, 11, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_bf8 v1, 22, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x4b,0xd2,0x96,0x06,0x02,0x00] +v_cvt_scalef32_f16_bf8 v1, 22, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_bf8 v1, 33, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x4b,0xd2,0xa1,0x06,0x02,0x00] +v_cvt_scalef32_f16_bf8 v1, 33, v3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_bf8 v1, 44, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x4b,0xd2,0xac,0x06,0x02,0x00] +v_cvt_scalef32_f16_bf8 v1, 44, v3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_bf8 v1, 11, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4b,0xd2,0x8b,0x06,0x02,0x00] +v_cvt_scalef32_f16_bf8 v1, 11, v3 op_sel:[0,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_bf8 v1, 22, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x4b,0xd2,0x96,0x06,0x02,0x00] + v_cvt_scalef32_f16_bf8 v1, 22, v3 op_sel:[1,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_bf8 v1, 33, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4b,0xd2,0xa1,0x06,0x02,0x00] +v_cvt_scalef32_f16_bf8 v1, 33, v3 op_sel:[0,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f16_bf8 v1, 44, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x4b,0xd2,0xac,0x06,0x02,0x00] +v_cvt_scalef32_f16_bf8 v1, 44, v3 op_sel:[1,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 ; encoding: [0x01,0x00,0x3c,0xd2,0x02,0x07,0x02,0x00] + v_cvt_scalef32_f32_bf8 v1, v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x3c,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x3c,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x3c,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x3c,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x3c,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[1,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x3c,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[0,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3c,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[1,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_bf8 v1, s1, v3 ; encoding: [0x01,0x00,0x3c,0xd2,0x01,0x06,0x02,0x00] +v_cvt_scalef32_f32_bf8 v1, s1, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_bf8 v1, s2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x3c,0xd2,0x02,0x06,0x02,0x00] +v_cvt_scalef32_f32_bf8 v1, s2, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_bf8 v1, s3, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x3c,0xd2,0x03,0x06,0x02,0x00] +v_cvt_scalef32_f32_bf8 v1, s3, v3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_bf8 v1, s4, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x3c,0xd2,0x04,0x06,0x02,0x00] +v_cvt_scalef32_f32_bf8 v1, s4, v3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_bf8 v1, s1, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x3c,0xd2,0x01,0x06,0x02,0x00] +v_cvt_scalef32_f32_bf8 v1, s1, v3 op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_bf8 v1, s2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x3c,0xd2,0x02,0x06,0x02,0x00] +v_cvt_scalef32_f32_bf8 v1, s2, v3 op_sel:[1,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_bf8 v1, s3, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x3c,0xd2,0x03,0x06,0x02,0x00] +v_cvt_scalef32_f32_bf8 v1, s3, v3 op_sel:[0,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_bf8 v1, s4, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3c,0xd2,0x04,0x06,0x02,0x00] +v_cvt_scalef32_f32_bf8 v1, s4, v3 op_sel:[1,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_bf8 v1, 11, v3 ; encoding: [0x01,0x00,0x3c,0xd2,0x8b,0x06,0x02,0x00] +v_cvt_scalef32_f32_bf8 v1, 11, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_bf8 v1, 22, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x3c,0xd2,0x96,0x06,0x02,0x00] +v_cvt_scalef32_f32_bf8 v1, 22, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_bf8 v1, 33, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x3c,0xd2,0xa1,0x06,0x02,0x00] +v_cvt_scalef32_f32_bf8 v1, 33, v3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_bf8 v1, 44, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x3c,0xd2,0xac,0x06,0x02,0x00] +v_cvt_scalef32_f32_bf8 v1, 44, v3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_bf8 v1, 11, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x3c,0xd2,0x8b,0x06,0x02,0x00] +v_cvt_scalef32_f32_bf8 v1, 11, v3 op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_bf8 v1, 22, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x3c,0xd2,0x96,0x06,0x02,0x00] +v_cvt_scalef32_f32_bf8 v1, 22, v3 op_sel:[1,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_bf8 v1, 33, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x3c,0xd2,0xa1,0x06,0x02,0x00] + v_cvt_scalef32_f32_bf8 v1, 33, v3 op_sel:[0,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_f32_bf8 v1, 44, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3c,0xd2,0xac,0x06,0x02,0x00] +v_cvt_scalef32_f32_bf8 v1, 44, v3 op_sel:[1,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, v2, v3 ; encoding: [0x01,0x00,0x35,0xd2,0x01,0x05,0x0e,0x04] +v_cvt_scalef32_pk_fp8_f32 v1, v1, v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, -v2, |v3| ; encoding: [0x01,0x04,0x35,0xd2,0x01,0x05,0x0e,0x44] +v_cvt_scalef32_pk_fp8_f32 v1, v1, -v2, |v3| + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, s2, 3 ; encoding: [0x01,0x00,0x35,0xd2,0x01,0x05,0x0c,0x02] +v_cvt_scalef32_pk_fp8_f32 v1, v1, s2, 3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x35,0xd2,0x01,0x05,0x0e,0x04] +v_cvt_scalef32_pk_fp8_f32 v1, v1, v2, v3 op_sel:[0,0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1] ; encoding: [0x01,0x44,0x35,0xd2,0x01,0x05,0x0e,0x44] +v_cvt_scalef32_pk_fp8_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x35,0xd2,0x01,0x05,0x0c,0x02] +v_cvt_scalef32_pk_fp8_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, v2, v3 ; encoding: [0x01,0x00,0x36,0xd2,0x01,0x05,0x0e,0x04] +v_cvt_scalef32_pk_bf8_f32 v1, v1, v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, -v2, |v3| ; encoding: [0x01,0x04,0x36,0xd2,0x01,0x05,0x0e,0x44] +v_cvt_scalef32_pk_bf8_f32 v1, v1, -v2, |v3| + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, s2, 3 ; encoding: [0x01,0x00,0x36,0xd2,0x01,0x05,0x0c,0x02] +v_cvt_scalef32_pk_bf8_f32 v1, v1, s2, 3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x36,0xd2,0x01,0x05,0x0e,0x04] +v_cvt_scalef32_pk_bf8_f32 v1, v1, v2, v3 op_sel:[0,0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1] ; encoding: [0x01,0x44,0x36,0xd2,0x01,0x05,0x0e,0x44] +v_cvt_scalef32_pk_bf8_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x36,0xd2,0x01,0x05,0x0c,0x02] +v_cvt_scalef32_pk_bf8_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, v3 ; encoding: [0x02,0x00,0x39,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, s3 ; encoding: [0x02,0x00,0x39,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, s3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], s2, 3 ; encoding: [0x02,0x00,0x39,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_f32_fp8 v[2:3], s2, 3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, v3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x39,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, s3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x39,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, s3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], s2, 3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x39,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_f32_fp8 v[2:3], s2, 3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, v3 ; encoding: [0x02,0x00,0x3a,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, s3 ; encoding: [0x02,0x00,0x3a,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, s3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], s2, 3 ; encoding: [0x02,0x00,0x3a,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_f32_bf8 v[2:3], s2, 3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, v3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3a,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, s3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3a,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, s3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], s2, 3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3a,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_f32_bf8 v[2:3], s2, 3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x40,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp8_f16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x40,0xd2,0x02,0x07,0x02,0x20] +v_cvt_scalef32_pk_fp8_f16 v1, -v2, |v3| + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp8_f16 v1, s2, 3 ; encoding: [0x01,0x00,0x40,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_fp8_f16 v1, s2, 3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x40,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp8_f16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x40,0xd2,0x02,0x07,0x02,0x20] +v_cvt_scalef32_pk_fp8_f16 v1, -v2, |v3| op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp8_f16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x40,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_fp8_f16 v1, s2, 3 op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 ; encoding: [0x01,0x00,0x44,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x44,0xd2,0x02,0x07,0x02,0x20] +v_cvt_scalef32_pk_fp8_bf16 v1, -v2, |v3| + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, s2, 3 ; encoding: [0x01,0x00,0x44,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_fp8_bf16 v1, s2, 3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x44,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x44,0xd2,0x02,0x07,0x02,0x20] +v_cvt_scalef32_pk_fp8_bf16 v1, -v2, |v3| op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x44,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_fp8_bf16 v1, s2, 3 op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x41,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf8_f16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x41,0xd2,0x02,0x07,0x02,0x20] +v_cvt_scalef32_pk_bf8_f16 v1, -v2, |v3| + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf8_f16 v1, s2, 3 ; encoding: [0x01,0x00,0x41,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_bf8_f16 v1, s2, 3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x41,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf8_f16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x41,0xd2,0x02,0x07,0x02,0x20] +v_cvt_scalef32_pk_bf8_f16 v1, -v2, |v3| op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf8_f16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x41,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_bf8_f16 v1, s2, 3 op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 ; encoding: [0x01,0x00,0x45,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x45,0xd2,0x02,0x07,0x02,0x20] +v_cvt_scalef32_pk_bf8_bf16 v1, -v2, |v3| + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 ; encoding: [0x01,0x00,0x45,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x45,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x45,0xd2,0x02,0x07,0x02,0x20] +v_cvt_scalef32_pk_bf8_bf16 v1, -v2, |v3| op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x45,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 ; encoding: [0x02,0x00,0x3f,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 ; encoding: [0x02,0x00,0x3f,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 ; encoding: [0x02,0x00,0x3f,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3f,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3f,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3f,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[0,1,0] ; encoding: [0x02,0x10,0x3f,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[0,1,0] ; encoding: [0x02,0x10,0x3f,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[0,1,0] ; encoding: [0x02,0x10,0x3f,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[1,1,0] ; encoding: [0x02,0x18,0x3f,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[1,1,0] ; encoding: [0x02,0x18,0x3f,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[1,1,0] ; encoding: [0x02,0x18,0x3f,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 ; encoding: [0x01,0x00,0x3d,0xd2,0x01,0x05,0x0e,0x04] +v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| ; encoding: [0x01,0x04,0x3d,0xd2,0x01,0x05,0x0e,0x44] +v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 ; encoding: [0x01,0x00,0x3d,0xd2,0x01,0x05,0x0c,0x02] +v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x01,0x20,0x3d,0xd2,0x01,0x05,0x0e,0x04] +v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,1,0] ; encoding: [0x01,0x24,0x3d,0xd2,0x01,0x05,0x0e,0x44] +v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,0] ; encoding: [0x01,0x20,0x3d,0xd2,0x01,0x05,0x0c,0x02] +v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x3d,0xd2,0x01,0x05,0x0e,0x04] +v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1] ; encoding: [0x01,0x44,0x3d,0xd2,0x01,0x05,0x0e,0x44] +v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x3d,0xd2,0x01,0x05,0x0c,0x02] +v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x3d,0xd2,0x01,0x05,0x0e,0x04] +v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,1,1] ; encoding: [0x01,0x64,0x3d,0xd2,0x01,0x05,0x0e,0x44] +v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x3d,0xd2,0x01,0x05,0x0c,0x02] +v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x50,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x50,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x50,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x50,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x50,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x50,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x50,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x50,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x50,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 ; encoding: [0x01,0x00,0x51,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 ; encoding: [0x01,0x00,0x51,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 ; encoding: [0x01,0x00,0x51,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x51,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x51,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x51,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x51,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x51,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x51,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 ; encoding: [0x02,0x00,0x56,0xd2,0x02,0x0d,0x02,0x00] +v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 ; encoding: [0x02,0x00,0x57,0xd2,0x02,0x0d,0x02,0x00] +v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk32_bf16_bf6 v[10:25], v[20:25], v8 ; encoding: [0x0a,0x00,0x63,0xd2,0x14,0x11,0x02,0x00] +v_cvt_scalef32_pk32_bf16_bf6 v[10:25], v[20:25], v8 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk32_bf16_bf6 v[10:25], v[20:25], v8 ; encoding: [0x0a,0x00,0x63,0xd2,0x14,0x11,0x02,0x00] +v_cvt_scalef32_pk32_bf16_bf6 v[10:25], v[20:25], v8 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk32_f16_bf6 v[10:25], v[20:25], v8 ; encoding: [0x0a,0x00,0x62,0xd2,0x14,0x11,0x02,0x00] +v_cvt_scalef32_pk32_f16_bf6 v[10:25], v[20:25], v8 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk32_bf16_fp6 v[10:25], v[20:25], v8 ; encoding: [0x0a,0x00,0x61,0xd2,0x14,0x11,0x02,0x00] +v_cvt_scalef32_pk32_bf16_fp6 v[10:25], v[20:25], v8 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk32_f16_fp6 v[10:25], v[20:25], v8 ; encoding: [0x0a,0x00,0x60,0xd2,0x14,0x11,0x02,0x00] +v_cvt_scalef32_pk32_f16_fp6 v[10:25], v[20:25], v8 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk32_bf6_bf16 v[20:25], v[10:25], v8 ; encoding: [0x14,0x00,0x5b,0xd2,0x0a,0x11,0x02,0x00] +v_cvt_scalef32_pk32_bf6_bf16 v[20:25], v[10:25], v8 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk32_bf6_f16 v[20:25], v[10:25], v8 ; encoding: [0x14,0x00,0x5a,0xd2,0x0a,0x11,0x02,0x00] +v_cvt_scalef32_pk32_bf6_f16 v[20:25], v[10:25], v8 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk32_fp6_bf16 v[20:25], v[10:25], v8 ; encoding: [0x14,0x00,0x59,0xd2,0x0a,0x11,0x02,0x00] +v_cvt_scalef32_pk32_fp6_bf16 v[20:25], v[10:25], v8 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk32_fp6_f16 v[20:25], v[10:25], v8 ; encoding: [0x14,0x00,0x58,0xd2,0x0a,0x11,0x02,0x00] +v_cvt_scalef32_pk32_fp6_f16 v[20:25], v[10:25], v8 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp8 v1, v2, v3 ; encoding: [0x01,0x00,0x48,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_f16_fp8 v1, v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp8 v1, v2, s3 ; encoding: [0x01,0x00,0x48,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_f16_fp8 v1, v2, s3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp8 v1, s2, 3 ; encoding: [0x01,0x00,0x48,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_f16_fp8 v1, s2, 3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x48,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_f16_fp8 v1, v2, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp8 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x48,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_f16_fp8 v1, v2, s3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x48,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_f16_fp8 v1, s2, 3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_bf8 v1, v2, v3 ; encoding: [0x01,0x00,0x49,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_f16_bf8 v1, v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_bf8 v1, v2, s3 ; encoding: [0x01,0x00,0x49,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_f16_bf8 v1, v2, s3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_bf8 v1, s2, 3 ; encoding: [0x01,0x00,0x49,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_f16_bf8 v1, s2, 3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_bf8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x49,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_f16_bf8 v1, v2, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_bf8 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x49,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_f16_bf8 v1, v2, s3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_bf8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x49,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_f16_bf8 v1, s2, 3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, v2, v3 ; encoding: [0x01,0x00,0x69,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_bf16_fp8 v1, v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, v2, s3 ; encoding: [0x01,0x00,0x69,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_bf16_fp8 v1, v2, s3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, s2, 3 ; encoding: [0x01,0x00,0x69,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_bf16_fp8 v1, s2, 3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x69,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_bf16_fp8 v1, v2, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x69,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_bf16_fp8 v1, v2, s3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x69,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_bf16_fp8 v1, s2, 3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, v2, v3 ; encoding: [0x01,0x00,0x6a,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_bf16_bf8 v1, v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, v2, s3 ; encoding: [0x01,0x00,0x6a,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_bf16_bf8 v1, v2, s3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, s2, 3 ; encoding: [0x01,0x00,0x6a,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_bf16_bf8 v1, s2, 3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x6a,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_bf16_bf8 v1, v2, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x6a,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_bf16_bf8 v1, v2, s3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x6a,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_bf16_bf8 v1, s2, 3 op_sel:[1,0,0] + +// NOT-GFX950: error: instruction not supported on this GPU +// GFX950: v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 + +// NOT-GFX950: error: instruction not supported on this GPU +// GFX950: v_cvt_scalef32_pk_fp4_f16 v1, s2, 3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_fp4_f16 v1, s2, 3 + +// NOT-GFX950: error: instruction not supported on this GPU +// GFX950: v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x4c,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 op_sel:[0,0,1,1] + +// NOT-GFX950: error: instruction not supported on this GPU +// GFX950: v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x4c,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 op_sel:[0,0,0,1] + +// NOT-GFX950: error: instruction not supported on this GPU +// GFX950: v_cvt_scalef32_pk_fp4_f16 v1, -|s2|, v3 ; encoding: [0x01,0x01,0x4c,0xd2,0x02,0x06,0x02,0x20] +v_cvt_scalef32_pk_fp4_f16 v1, -|s2|, v3 + +// NOT-GFX950: error: instruction not supported on this GPU +// GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 ; encoding: [0x01,0x00,0x4d,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 + +// NOT-GFX950: error: instruction not supported on this GPU +// GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, s2, 3 ; encoding: [0x01,0x00,0x4d,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_fp4_bf16 v1, s2, 3 + +// NOT-GFX950: error: instruction not supported on this GPU +// GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x4d,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 op_sel:[0,0,1,1] + +// NOT-GFX950: error: instruction not supported on this GPU +// GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x4d,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 op_sel:[0,0,0,1] + +// NOT-GFX950: error: instruction not supported on this GPU +// GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, -|s2|, v3 ; encoding: [0x01,0x01,0x4d,0xd2,0x02,0x06,0x02,0x20] +v_cvt_scalef32_pk_fp4_bf16 v1, -|s2|, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_2xpk16_fp6_f32 v[20:25], v[10:25], v[10:25], v6 ; encoding: [0x14,0x00,0x52,0xd2,0x0a,0x15,0x1a,0x04] +v_cvt_scalef32_2xpk16_fp6_f32 v[20:25], v[10:25], v[10:25], v6 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], v6 ; encoding: [0x14,0x00,0x53,0xd2,0x0a,0x15,0x1a,0x04] +v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], v6 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_2xpk16_fp6_f32 v[20:25], v[10:25], v[10:25], s6 ; encoding: [0x14,0x00,0x52,0xd2,0x0a,0x15,0x1a,0x00] +v_cvt_scalef32_2xpk16_fp6_f32 v[20:25], v[10:25], v[10:25], s6 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], s6 ; encoding: [0x14,0x00,0x53,0xd2,0x0a,0x15,0x1a,0x00] +v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], s6 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_2xpk16_fp6_f32 v[20:25], v[10:25], v[10:25], 22 ; encoding: [0x14,0x00,0x52,0xd2,0x0a,0x15,0x5a,0x02] +v_cvt_scalef32_2xpk16_fp6_f32 v[20:25], v[10:25], v[10:25], 22 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], 11 ; encoding: [0x14,0x00,0x53,0xd2,0x0a,0x15,0x2e,0x02] +v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], 11 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x03] +buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0xff,0x02,0x03] +buffer_atomic_pk_add_bf16 v255, off, s[8:11], s3 offset:4095 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x03,0x03] +buffer_atomic_pk_add_bf16 v5, off, s[12:15], s3 offset:4095 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x18,0x03] +buffer_atomic_pk_add_bf16 v5, off, s[96:99], s3 offset:4095 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x65] +buffer_atomic_pk_add_bf16 v5, off, s[8:11], s101 offset:4095 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x7c] +buffer_atomic_pk_add_bf16 v5, off, s[8:11], m0 offset:4095 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x48,0xe1,0x00,0x05,0x02,0x03] +buffer_atomic_pk_add_bf16 v5, v0, s[8:11], s3 idxen offset:4095 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x48,0xe1,0x00,0x05,0x02,0x03] +buffer_atomic_pk_add_bf16 v5, v0, s[8:11], s3 offen offset:4095 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x48,0xe1,0x00,0x05,0x02,0x03] +buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x48,0xe1,0x00,0x05,0x02,0x03] +buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x48,0xe1,0x00,0x05,0x02,0x03] +buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:7 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x80] +buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0 offset:4095 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xc1] +buffer_atomic_pk_add_bf16 v5, off, s[8:11], -1 offset:4095 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xf0] +buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0.5 offset:4095 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xf7] +buffer_atomic_pk_add_bf16 v5, off, s[8:11], -4.0 offset:4095 + + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_maximum3_f32 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0x04] +v_maximum3_f32 v1, v2, v3, v4 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_maximum3_f32 v1, -v2, -v3, -v4 ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0xe4] +v_maximum3_f32 v1, -v2, -v3, -v4 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_maximum3_f32 v1, -|v2|, -|v3|, -|v4| ; encoding: [0x01,0x07,0xa9,0xd2,0x02,0x07,0x12,0xe4] +v_maximum3_f32 v1, -|v2|, -|v3|, -|v4| + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_maximum3_f32 v1, 0, 1.0, v3 ; encoding: [0x01,0x00,0xa9,0xd2,0x80,0xe4,0x0d,0x04] +v_maximum3_f32 v1, 0.0, 1.0, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_maximum3_f32 v2, 0, v3, 1.0 ; encoding: [0x02,0x00,0xa9,0xd2,0x80,0x06,0xca,0x03] +v_maximum3_f32 v2, 0.0, v3, 1.0 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_maximum3_f32 v1, s8, v3, 1.0 ; encoding: [0x01,0x00,0xa9,0xd2,0x08,0x06,0xca,0x03] +v_maximum3_f32 v1, s8, v3, 1.0 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_maximum3_f32 v1, v2, s8, v3 ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x11,0x0c,0x04] +v_maximum3_f32 v1, v2, s8, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_minimum3_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04] +v_minimum3_f32 v0, v1, v2, v3 + + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_minimum3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0x07,0x12,0x1c] +v_pk_minimum3_f16 v1, v2, v3, v4 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_minimum3_f16 v1, v2, v3, 2.0 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0x07,0xd2,0x1b] +v_pk_minimum3_f16 v1, v2, v3, 2.0 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_minimum3_f16 v1, v2, 2.0, v3 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0xe9,0x0d,0x1c] +v_pk_minimum3_f16 v1, v2, 2.0, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_minimum3_f16 v1, 2.0, v2, v3 ; encoding: [0x01,0x40,0x9b,0xd3,0xf4,0x04,0x0e,0x1c] +v_pk_minimum3_f16 v1, 2.0, v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_minimum3_f16 v1, v2, v3, v4 clamp ; encoding: [0x01,0xc0,0x9b,0xd3,0x02,0x07,0x12,0x1c] +v_pk_minimum3_f16 v1, v2, v3, v4 clamp + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_minimum3_f16 v8, v0, s8, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x11,0x04,0x1c] +v_pk_minimum3_f16 v8, v0, s8, v1 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_minimum3_f16 v8, v0, v1, s8 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x03,0x22,0x18] +v_pk_minimum3_f16 v8, v0, v1, s8 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c] +v_pk_minimum3_f16 v8, v0, s0, v1 neg_lo:[0,0,0] neg_hi:[0,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c] +v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1] neg_lo:[0,0,0] neg_hi:[0,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c] +v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x9b,0xd3,0x00,0x01,0x04,0x04] +v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[0,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x9b,0xd3,0x00,0x01,0x04,0x04] +v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_maximum3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0x07,0x12,0x1c] +v_pk_maximum3_f16 v1, v2, v3, v4 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_maximum3_f16 v1, v2, v3, 2.0 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0x07,0xd2,0x1b] +v_pk_maximum3_f16 v1, v2, v3, 2.0 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_maximum3_f16 v1, v2, 2.0, v3 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0xe9,0x0d,0x1c] +v_pk_maximum3_f16 v1, v2, 2.0, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_maximum3_f16 v1, 2.0, v2, v3 ; encoding: [0x01,0x40,0x9c,0xd3,0xf4,0x04,0x0e,0x1c] +v_pk_maximum3_f16 v1, 2.0, v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_maximum3_f16 v1, v2, v3, v4 clamp ; encoding: [0x01,0xc0,0x9c,0xd3,0x02,0x07,0x12,0x1c] +v_pk_maximum3_f16 v1, v2, v3, v4 clamp + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_maximum3_f16 v8, v0, s8, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x11,0x04,0x1c] +v_pk_maximum3_f16 v8, v0, s8, v1 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_maximum3_f16 v8, v0, v1, s8 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x03,0x22,0x18] +v_pk_maximum3_f16 v8, v0, v1, s8 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c] +v_pk_maximum3_f16 v8, v0, s0, v1 neg_lo:[0,0,0] neg_hi:[0,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c] +v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1] neg_lo:[0,0,0] neg_hi:[0,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c] +v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x9c,0xd3,0x00,0x01,0x04,0x04] +v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[0,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x9c,0xd3,0x00,0x01,0x04,0x04] +v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_read_tr.s b/llvm/test/MC/AMDGPU/gfx950_asm_read_tr.s new file mode 100644 index 0000000000000..93d015f790c86 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx950_asm_read_tr.s @@ -0,0 +1,34 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX950 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefix=GFX940-ERR --implicit-check-not=error: %s + +ds_read_b64_tr_b4 v[0:1], v1 +// GFX940-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX950: encoding: [0x00,0x00,0xc0,0xd9,0x01,0x00,0x00,0x00] + +ds_read_b64_tr_b4 v[2:3], v3 offset:64 +// GFX940-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX950: encoding: [0x40,0x00,0xc0,0xd9,0x03,0x00,0x00,0x02] + +ds_read_b64_tr_b8 v[0:1], v1 +// GFX940-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX950: encoding: [0x00,0x00,0xc4,0xd9,0x01,0x00,0x00,0x00] + +ds_read_b64_tr_b8 v[2:3], v3 offset:64 +// GFX940-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX950: encoding: [0x40,0x00,0xc4,0xd9,0x03,0x00,0x00,0x02] + +ds_read_b64_tr_b16 v[0:1], v1 +// GFX940-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX950: encoding: [0x00,0x00,0xc6,0xd9,0x01,0x00,0x00,0x00] + +ds_read_b64_tr_b16 v[2:3], v3 offset:64 +// GFX940-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX950: encoding: [0x40,0x00,0xc6,0xd9,0x03,0x00,0x00,0x02] + +ds_read_b96_tr_b6 v[0:2], v0 +// GFX940-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX950: encoding: [0x00,0x00,0xc2,0xd9,0x00,0x00,0x00,0x00] + +ds_read_b96_tr_b6 v[2:4], v2 offset:64 +// GFX940-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX950: encoding: [0x40,0x00,0xc2,0xd9,0x02,0x00,0x00,0x02] diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx950_asm_vop3.s index c9980f420b955..5f5e505711705 100644 --- a/llvm/test/MC/AMDGPU/gfx950_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx950_asm_vop3.s @@ -1,26 +1,148 @@ -// RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX950 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck -check-prefix=GFX940-ERR --strict-whitespace %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx906 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX906-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx940 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX940-ERR %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -show-encoding < %s | FileCheck --check-prefix=GFX950 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX12-ERR %s v_cvt_pk_bf16_f32 v5, v1, v2 +// GFX906-ERR: error: instruction not supported on this GPU +// GFX940-ERR: error: instruction not supported on this GPU // GFX950: v_cvt_pk_bf16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x68,0xd2,0x01,0x05,0x02,0x00] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR: error: instruction not supported on this GPU v_cvt_pk_bf16_f32 v5, v255, v255 +// GFX906-ERR: error: instruction not supported on this GPU +// GFX940-ERR: error: instruction not supported on this GPU // GFX950: v_cvt_pk_bf16_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x68,0xd2,0xff,0xff,0x03,0x00] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR: error: instruction not supported on this GPU v_cvt_pk_bf16_f32 v5, v1, s2 +// GFX906-ERR: error: instruction not supported on this GPU +// GFX940-ERR: error: instruction not supported on this GPU // GFX950: v_cvt_pk_bf16_f32 v5, v1, s2 ; encoding: [0x05,0x00,0x68,0xd2,0x01,0x05,0x00,0x00] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR: error: instruction not supported on this GPU v_cvt_pk_bf16_f32 v5, m0, 0.5 +// GFX906-ERR: error: instruction not supported on this GPU +// GFX940-ERR: error: instruction not supported on this GPU // GFX950: v_cvt_pk_bf16_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x68,0xd2,0x7c,0xe0,0x01,0x00] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR: error: instruction not supported on this GPU v_cvt_pk_bf16_f32 v5, -1, exec_hi +// GFX906-ERR: error: instruction not supported on this GPU +// GFX940-ERR: error: instruction not supported on this GPU // GFX950: v_cvt_pk_bf16_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x68,0xd2,0xc1,0xfe,0x00,0x00] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR: error: instruction not supported on this GPU v_cvt_pk_bf16_f32 v5, 0.5, m0 mul:2 +// GFX906-ERR: error: instruction not supported on this GPU +// GFX940-ERR: error: instruction not supported on this GPU // GFX950: v_cvt_pk_bf16_f32 v5, 0.5, m0 mul:2 ; encoding: [0x05,0x00,0x68,0xd2,0xf0,0xf8,0x00,0x08] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR: error: instruction not supported on this GPU + +v_bitop3_b32 v5, v1, v2, s3 +// GFX906-ERR: error: instruction not supported on this GPU +// GFX940-ERR: error: instruction not supported on this GPU +// GFX950: v_bitop3_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x34,0xd2,0x01,0x05,0x0e,0x00] +// GFX12-ERR: error: instruction not supported on this GPU + +v_bitop3_b32 v5, v1, v2, s3 bitop3:161 +// GFX906-ERR: error: instruction not supported on this GPU +// GFX940-ERR: error: instruction not supported on this GPU +// GFX950: v_bitop3_b32 v5, v1, v2, s3 bitop3:0xa1 ; encoding: [0x05,0x04,0x34,0xd2,0x01,0x05,0x0e,0x30] +// GFX12-ERR: error: instruction not supported on this GPU + +v_bitop3_b32 v5, m0, 0.5, m0 bitop3:5 +// GFX906-ERR: error: instruction not supported on this GPU +// GFX940-ERR: error: instruction not supported on this GPU +// GFX950: v_bitop3_b32 v5, m0, 0.5, m0 bitop3:5 ; encoding: [0x05,0x00,0x34,0xd2,0x7c,0xe0,0xf1,0xa1] +// GFX12-ERR: error: instruction not supported on this GPU + +v_bitop3_b32 v5, 0.5, m0, 0.5 bitop3:101 +// GFX906-ERR: error: instruction not supported on this GPU +// GFX940-ERR: error: instruction not supported on this GPU +// GFX950: v_bitop3_b32 v5, 0.5, m0, 0.5 bitop3:0x65 ; encoding: [0x05,0x04,0x34,0xd2,0xf0,0xf8,0xc0,0xab] +// GFX12-ERR: error: instruction not supported on this GPU + +v_bitop3_b16 v5, v1, v2, s3 +// GFX906-ERR: error: instruction not supported on this GPU +// GFX940-ERR: error: instruction not supported on this GPU +// GFX950: v_bitop3_b16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x33,0xd2,0x01,0x05,0x0e,0x00] +// GFX12-ERR: error: instruction not supported on this GPU + +v_bitop3_b16 v5, v1, v2, s3 bitop3:161 +// GFX906-ERR: error: instruction not supported on this GPU +// GFX940-ERR: error: instruction not supported on this GPU +// GFX950: v_bitop3_b16 v5, v1, v2, s3 bitop3:0xa1 ; encoding: [0x05,0x04,0x33,0xd2,0x01,0x05,0x0e,0x30] +// GFX12-ERR: error: instruction not supported on this GPU + +v_ashr_pk_i8_i32 v2, s4, v7, v8 +// GFX906-ERR: error: instruction not supported on this GPU +// GFX940-ERR: error: instruction not supported on this GPU +// GFX950: v_ashr_pk_i8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x65,0xd2,0x04,0x0e,0x22,0x04] +// GFX12-ERR: error: instruction not supported on this GPU + +v_ashr_pk_i8_i32 v2, v4, 0, 1 +// GFX906-ERR: error: instruction not supported on this GPU +// GFX940-ERR: error: instruction not supported on this GPU +// GFX950: v_ashr_pk_i8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x65,0xd2,0x04,0x01,0x05,0x02] +// GFX12-ERR: error: instruction not supported on this GPU + +v_ashr_pk_i8_i32 v2, v4, 3, s2 +// GFX906-ERR: error: instruction not supported on this GPU +// GFX940-ERR: error: instruction not supported on this GPU +// GFX950: v_ashr_pk_i8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x65,0xd2,0x04,0x07,0x09,0x00] +// GFX12-ERR: error: instruction not supported on this GPU + +v_ashr_pk_i8_i32 v2, s4, 4, v2 +// GFX906-ERR: error: instruction not supported on this GPU +// GFX940-ERR: error: instruction not supported on this GPU +// GFX950: v_ashr_pk_i8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x65,0xd2,0x04,0x08,0x09,0x04] +// GFX12-ERR: error: instruction not supported on this GPU + +v_ashr_pk_i8_i32 v2, v4, v7, 0.5 +// GFX906-ERR: error: instruction not supported on this GPU +// GFX940-ERR: error: instruction not supported on this GPU +// GFX950: v_ashr_pk_i8_i32 v2, v4, v7, 0.5 ; encoding: [0x02,0x00,0x65,0xd2,0x04,0x0f,0xc2,0x03] +// GFX12-ERR: error: instruction not supported on this GPU + +v_ashr_pk_i8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] +// GFX906-ERR: error: instruction not supported on this GPU +// GFX940-ERR: error: instruction not supported on this GPU +// GFX950: v_ashr_pk_i8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x65,0xd2,0x02,0x07,0x12,0x04] +// GFX12-ERR: error: instruction not supported on this GPU + +v_ashr_pk_u8_i32 v2, s4, v7, v8 +// GFX906-ERR: error: instruction not supported on this GPU +// GFX940-ERR: error: instruction not supported on this GPU +// GFX950: v_ashr_pk_u8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x66,0xd2,0x04,0x0e,0x22,0x04] +// GFX12-ERR: error: instruction not supported on this GPU + +v_ashr_pk_u8_i32 v2, v4, 0, 1 +// GFX906-ERR: error: instruction not supported on this GPU +// GFX940-ERR: error: instruction not supported on this GPU +// GFX950: v_ashr_pk_u8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x66,0xd2,0x04,0x01,0x05,0x02] +// GFX12-ERR: error: instruction not supported on this GPU + +v_ashr_pk_u8_i32 v2, v4, 3, s2 +// GFX906-ERR: error: instruction not supported on this GPU +// GFX940-ERR: error: instruction not supported on this GPU +// GFX950: v_ashr_pk_u8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x66,0xd2,0x04,0x07,0x09,0x00] +// GFX12-ERR: error: instruction not supported on this GPU + +v_ashr_pk_u8_i32 v2, s4, 4, v2 +// GFX906-ERR: error: instruction not supported on this GPU +// GFX940-ERR: error: instruction not supported on this GPU +// GFX950: v_ashr_pk_u8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x66,0xd2,0x04,0x08,0x09,0x04] +// GFX12-ERR: error: instruction not supported on this GPU + +v_ashr_pk_u8_i32 v2, v4, v7, -2.0 +// GFX906-ERR: error: instruction not supported on this GPU +// GFX940-ERR: error: instruction not supported on this GPU +// GFX950: v_ashr_pk_u8_i32 v2, v4, v7, -2.0 ; encoding: [0x02,0x00,0x66,0xd2,0x04,0x0f,0xd6,0x03] +// GFX12-ERR: error: instruction not supported on this GPU + +v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] +// GFX906-ERR: error: instruction not supported on this GPU +// GFX940-ERR: error: instruction not supported on this GPU +// GFX950: v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x66,0xd2,0x02,0x07,0x12,0x04] +// GFX12-ERR: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx950_dlops.s b/llvm/test/MC/AMDGPU/gfx950_dlops.s new file mode 100644 index 0000000000000..4ae60ac785f49 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx950_dlops.s @@ -0,0 +1,61 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX950 %s + +v_dot2_f32_bf16 v5, v1, v2, v3 +// GFX950: v_dot2_f32_bf16 v5, v1, v2, v3 ; encoding: [0x05,0x40,0x9a,0xd3,0x01,0x05,0x0e,0x1c] + +v_dot2_f32_bf16 v5, v1, v2, s3 +// GFX950: v_dot2_f32_bf16 v5, v1, v2, s3 ; encoding: [0x05,0x40,0x9a,0xd3,0x01,0x05,0x0e,0x18] + +v_dot2_f32_bf16 v2, v1, 0, v2 +// GFX950: v_dot2_f32_bf16 v2, v1, 0, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0x01,0x01,0x09,0x1c] + +v_dot2_f32_bf16 v2, v1, 0.5, v2 +// GFX950: v_dot2_f32_bf16 v2, v1, 0.5, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0x01,0xe1,0x09,0x1c] + +v_dot2_f32_bf16 v2, v1, -0.5, v2 +// GFX950: v_dot2_f32_bf16 v2, v1, -0.5, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0x01,0xe3,0x09,0x1c] + +v_dot2_f32_bf16 v2, v1, 1.0, v2 +// GFX950: v_dot2_f32_bf16 v2, v1, 1.0, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0x01,0xe5,0x09,0x1c] + +v_dot2_f32_bf16 v2, v1, -1.0, v2 +// GFX950: v_dot2_f32_bf16 v2, v1, -1.0, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0x01,0xe7,0x09,0x1c] + +v_dot2_f32_bf16 v2, v1, 2.0, v2 +// GFX950: v_dot2_f32_bf16 v2, v1, 2.0, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0x01,0xe9,0x09,0x1c] + +v_dot2_f32_bf16 v2, v1, -2.0, v2 +// GFX950: v_dot2_f32_bf16 v2, v1, -2.0, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0x01,0xeb,0x09,0x1c] + +v_dot2_f32_bf16 v2, v1, 4.0, v2 +// GFX950: v_dot2_f32_bf16 v2, v1, 4.0, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0x01,0xed,0x09,0x1c] + +v_dot2_f32_bf16 v2, v1, -4.0, v2 +// GFX950: v_dot2_f32_bf16 v2, v1, -4.0, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0x01,0xef,0x09,0x1c] + +v_dot2_f32_bf16 v2, v1, 0.15915494, v2 +// GFX950: v_dot2_f32_bf16 v2, v1, 0.15915494, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0x01,0xf1,0x09,0x1c] + +v_dot2_f32_bf16 v2, 0.5, v1, v2 +// GFX950: v_dot2_f32_bf16 v2, 0.5, v1, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0xf0,0x02,0x0a,0x1c] + +v_dot2_f32_bf16 v2, -0.5, v1, v2 +// GFX950: v_dot2_f32_bf16 v2, -0.5, v1, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0xf1,0x02,0x0a,0x1c] + +v_dot2_f32_bf16 v2, 1.0, v1, v2 +// GFX950: v_dot2_f32_bf16 v2, 1.0, v1, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0xf2,0x02,0x0a,0x1c] + +v_dot2_f32_bf16 v2, -1.0, v1, v2 +// GFX950: v_dot2_f32_bf16 v2, -1.0, v1, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0xf3,0x02,0x0a,0x1c] + +v_dot2_f32_bf16 v2, 2.0, v1, v2 +// GFX950: v_dot2_f32_bf16 v2, 2.0, v1, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0xf4,0x02,0x0a,0x1c] + +v_dot2_f32_bf16 v2, -2.0, v1, v2 +// GFX950: v_dot2_f32_bf16 v2, -2.0, v1, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0xf5,0x02,0x0a,0x1c] + +v_dot2_f32_bf16 v2, 4.0, v1, v2 +// GFX950: v_dot2_f32_bf16 v2, 4.0, v1, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0xf6,0x02,0x0a,0x1c] + +v_dot2_f32_bf16 v2, -4.0, v1, v2 +// GFX950: v_dot2_f32_bf16 v2, -4.0, v1, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0xf7,0x02,0x0a,0x1c] diff --git a/llvm/test/MC/AMDGPU/gfx950_err.s b/llvm/test/MC/AMDGPU/gfx950_err.s new file mode 100644 index 0000000000000..c5450e48558bf --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx950_err.s @@ -0,0 +1,394 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck --check-prefix=GFX950 --implicit-check-not=error: %s + +// GFX950: :[[@LINE+1]]:27: error: invalid operand for instruction +v_permlane16_swap_b32 v0, s0 + +// GFX950: :[[@LINE+1]]:27: error: invalid operand for instruction +v_permlane16_swap_b32 v0, m0 + +// GFX950: :[[@LINE+1]]:27: error: invalid operand for instruction +v_permlane16_swap_b32 v0, vcc + +// GFX950: :[[@LINE+1]]:27: error: invalid operand for instruction +v_permlane16_swap_b32 v0, vcc_lo + +// GFX950: :[[@LINE+1]]:23: error: invalid operand for instruction +v_permlane16_swap_b32 s0, v0 + +// GFX950: :[[@LINE+1]]:34: error: invalid operand for instruction +v_permlane16_swap_b32_e32 v1, v2 bound_ctrl:1 + +// GFX950: :[[@LINE+1]]:34: error: invalid operand for instruction +v_permlane16_swap_b32_e32 v1, v2 bound_ctrl:0 + +// GFX950: :[[@LINE+1]]:34: error: invalid operand for instruction +v_permlane16_swap_b32_e32 v1, v2 fi:1 + +// GFX950: :[[@LINE+1]]:34: error: invalid operand for instruction +v_permlane16_swap_b32_e32 v1, v2 fi:0 + +// GFX950: :[[@LINE+1]]:34: error: invalid operand for instruction +v_permlane16_swap_b32_e32 v1, v2 bound_ctrl:1 fi:1 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk32_bf16_bf6 v[10:25], v[20:25], v8 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_bf16_bf6 v[10:25], v[20:25], v8 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_bf16_bf6 v[10:25], v[20:25], v8 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_bf16_bf6 v[10:25], v[20:25], v8 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk32_f16_bf6 v[10:25], v[20:25], v8 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_f16_bf6 v[10:25], v[20:25], v8 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_f16_bf6 v[10:25], v[20:25], v8 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_f16_bf6 v[10:25], v[20:25], v8 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk32_bf16_fp6 v[10:25], v[20:25], v8 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_bf16_fp6 v[10:25], v[20:25], v8 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_bf16_fp6 v[10:25], v[20:25], v8 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_bf16_fp6 v[10:25], v[20:25], v8 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk32_f16_fp6 v[10:25], v[20:25], v8 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_f16_fp6 v[10:25], v[20:25], v8 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_f16_fp6 v[10:25], v[20:25], v8 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_f16_fp6 v[10:25], v[20:25], v8 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk32_fp6_f16 v[20:25], v[10:25], v8 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_fp6_f16 v[20:25], v[10:25], v8 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_fp6_f16 v[20:25], v[10:25], v8 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_fp6_f16 v[20:25], v[10:25], v8 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk32_bf6_f16 v[20:25], v[10:25], v8 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_bf6_f16 v[20:25], v[10:25], v8 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_bf6_f16 v[20:25], v[10:25], v8 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_bf6_f16 v[20:25], v[10:25], v8 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk32_fp6_bf16 v[20:25], v[10:25], v8 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_fp6_bf16 v[20:25], v[10:25], v8 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_fp6_bf16 v[20:25], v[10:25], v8 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_fp6_bf16 v[20:25], v[10:25], v8 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk32_fp6_f16 v[20:25], v[10:25], v8 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_fp6_f16 v[20:25], v[10:25], v8 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_fp6_f16 v[20:25], v[10:25], v8 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk32_fp6_f16 v[20:25], v[10:25], v8 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk_f16_fp8 v[20:25], v[10:25], v8 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_f16_fp8 v[20:25], v[10:25], v8 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_f16_fp8 v[20:25], v[10:25], v8 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_f16_fp8 v[20:25], v[10:25], v8 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk_f16_bf8 v[20:25], v[10:25], v8 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_f16_bf8 v[20:25], v[10:25], v8 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_f16_bf8 v[20:25], v[10:25], v8 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_f16_bf8 v[20:25], v[10:25], v8 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk_bf16_fp8 v[20:25], v[10:25], v8 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_bf16_fp8 v[20:25], v[10:25], v8 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_bf16_fp8 v[20:25], v[10:25], v8 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_bf16_fp8 v[20:25], v[10:25], v8 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk_bf16_bf8 v[20:25], v[10:25], v8 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_bf16_bf8 v[20:25], v[10:25], v8 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_bf16_bf8 v[20:25], v[10:25], v8 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_bf16_bf8 v[20:25], v[10:25], v8 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_2xpk16_fp6_f32 v[20:25], v[10:25], v[10:25], v6 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_2xpk16_fp6_f32 v[20:25], v[10:25], v[10:25], v6 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_2xpk16_fp6_f32 v[20:25], v[10:25], v[10:25], v6 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_2xpk16_fp6_f32 v[20:25], v[10:25], v[10:25], v6 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], v6 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], v6 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], v6 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], v6 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 glc + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 slc + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 dlc + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 glc slc dlc + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_maximum3_f16 v0, v1, v2, v3 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_minimum3_f16 v0, v1, v2, v3 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_maximum_f16 v0, v1, v2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_minimum_f16 v0, v1, v2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_maximum_f32 v0, v1, v2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_minimum_f32 v0, v1, v2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) +v_maximum3_f32 v0, s1, s2, v3 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) +v_maximum3_f32 v0, v3, s1, s2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) +v_maximum3_f32 v0, s1, v3, s2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) +v_minimum3_f32 v0, s1, s2, v3 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: literal operands are not supported +v_minimum3_f32 v0, v1, v2, 0xdeadbeef + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) +v_pk_minimum3_f16 v0, s1, s2, v3 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) +v_pk_maximum3_f16 v0, s1, s2, v3 diff --git a/llvm/test/MC/AMDGPU/gfx950_xdlops.s b/llvm/test/MC/AMDGPU/gfx950_xdlops.s new file mode 100644 index 0000000000000..2ca131c9c0bf4 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx950_xdlops.s @@ -0,0 +1,133 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck %s + +// CHECK: encoding: [0x01,0x05,0x0a,0x2c] +v_dot2c_f32_bf16 v5, v1, v2 + +// CHECK: encoding: [0x01,0x05,0xfe,0x2d] +v_dot2c_f32_bf16 v255, v1, v2 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0x00] +v_dot2c_f32_bf16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0xfe,0x2d,0x01,0xe4,0x00,0x00] +v_dot2c_f32_bf16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0xff,0xe4,0x00,0x00] +v_dot2c_f32_bf16_dpp v5, v255, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0xfe,0x0b,0x2c,0x01,0xe4,0x00,0x00] +v_dot2c_f32_bf16_dpp v5, v1, v255 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x1b,0x00,0x00] +v_dot2c_f32_bf16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x40,0x01,0x00] +v_dot2c_f32_bf16_dpp v5, v1, v2 row_mirror row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x41,0x01,0x00] +v_dot2c_f32_bf16_dpp v5, v1, v2 row_half_mirror row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x42,0x01,0x00] +v_dot2c_f32_bf16_dpp v5, v1, v2 row_bcast:15 row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x43,0x01,0x00] +v_dot2c_f32_bf16_dpp v5, v1, v2 row_bcast:31 row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x30,0x01,0x00] +v_dot2c_f32_bf16_dpp v5, v1, v2 wave_shl:1 row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x34,0x01,0x00] +v_dot2c_f32_bf16_dpp v5, v1, v2 wave_rol:1 row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x38,0x01,0x00] +v_dot2c_f32_bf16_dpp v5, v1, v2 wave_shr:1 row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x3c,0x01,0x00] +v_dot2c_f32_bf16_dpp v5, v1, v2 wave_ror:1 row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x01,0x01,0x00] +v_dot2c_f32_bf16_dpp v5, v1, v2 row_shl:1 row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x0f,0x01,0x00] +v_dot2c_f32_bf16_dpp v5, v1, v2 row_shl:15 row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x11,0x01,0x00] +v_dot2c_f32_bf16_dpp v5, v1, v2 row_shr:1 row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x1f,0x01,0x00] +v_dot2c_f32_bf16_dpp v5, v1, v2 row_shr:15 row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x21,0x01,0x00] +v_dot2c_f32_bf16_dpp v5, v1, v2 row_ror:1 row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x2f,0x01,0x00] +v_dot2c_f32_bf16_dpp v5, v1, v2 row_ror:15 row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0x10] +v_dot2c_f32_bf16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x1 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0x30] +v_dot2c_f32_bf16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0xf0] +v_dot2c_f32_bf16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0xf0] +v_dot2c_f32_bf16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0x01] +v_dot2c_f32_bf16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0x03] +v_dot2c_f32_bf16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0x0f] +v_dot2c_f32_bf16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0x0f] +v_dot2c_f32_bf16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x08,0x00] +v_dot2c_f32_bf16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:1 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x10,0x00] +v_dot2c_f32_bf16_dpp v5, -v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x20,0x00] +v_dot2c_f32_bf16_dpp v5, |v1|, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x40,0x00] +v_dot2c_f32_bf16_dpp v5, v1, -v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x80,0x00] +v_dot2c_f32_bf16_dpp v5, v1, |v2| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0x05,0x00,0x16,0xd1,0x01,0xfb,0x01,0x00] +v_dot2c_f32_bf16_e64 v5, v1, src_scc + +// CHECK: encoding: [0x05,0x00,0x16,0xd1,0xff,0xf9,0x01,0x00] +v_dot2c_f32_bf16_e64 v5, v255, src_execz + +// CHECK: encoding: [0x05,0x00,0x16,0xd1,0x65,0xca,0x00,0x00] +v_dot2c_f32_bf16_e64 v5, s101, s101 + +// CHECK: encoding: [0x05,0x00,0x16,0xd1,0xc1,0xcc,0x00,0x00] +v_dot2c_f32_bf16_e64 v5, -1, flat_scratch_lo + +// CHECK: encoding: [0x05,0x02,0x16,0xd1,0xf0,0xce,0x00,0x40] +v_dot2c_f32_bf16_e64 v5, 0.5, -|flat_scratch_hi| + +// CHECK: encoding: [0x05,0x00,0x16,0xd1,0xfc,0xe0,0x01,0x10] +v_dot2c_f32_bf16_e64 v5, src_execz, 0.5 mul:4 + +// CHECK: encoding: [0xff,0x81,0x16,0xd1,0xfd,0x82,0x01,0x38] +v_dot2c_f32_bf16_e64 v255, -|src_scc|, -1 clamp div:2 + +// CHECK: encoding: [0x8a,0x04,0x0a,0x2c] +v_dot2c_f32_bf16_e32 v5, 10, v2 ; encoding: [0x8a,0x04,0x0a,0x2c] + +// CHECK: encoding: [0xff,0x04,0x0a,0x2c,0x64,0x00,0x00,0x00] +v_dot2c_f32_bf16_e32 v5, 100, v2 ; encoding: [0xff,0x04,0x0a,0x2c,0x64,0x00,0x00,0x00] + +// CHECK: encoding: [0xff,0x04,0x0a,0x2c,0x22,0x41,0x00,0x00] +v_dot2c_f32_bf16_e32 v5, 10.1, v2 ; encoding: [0xff,0x04,0x0a,0x2c,0x22,0x41,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/invalid-instructions-spellcheck.s b/llvm/test/MC/AMDGPU/invalid-instructions-spellcheck.s index a5cca6ba5bd93..79ab8666234a2 100644 --- a/llvm/test/MC/AMDGPU/invalid-instructions-spellcheck.s +++ b/llvm/test/MC/AMDGPU/invalid-instructions-spellcheck.s @@ -1,4 +1,4 @@ -# RUN: not llvm-mc -triple amdgcn < %s 2>&1 | FileCheck --strict-whitespace %s +# RUN: not llvm-mc -triple amdgcn -mcpu=tahiti < %s 2>&1 | FileCheck --strict-whitespace %s # This tests the mnemonic spell checker. diff --git a/llvm/test/MC/AMDGPU/literals.s b/llvm/test/MC/AMDGPU/literals.s index 7b3bd5ece0988..783947544d221 100644 --- a/llvm/test/MC/AMDGPU/literals.s +++ b/llvm/test/MC/AMDGPU/literals.s @@ -1,10 +1,8 @@ -// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=SICI,CI // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GFX89 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefixes=GFX89,GFX9 -// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSI,NOSICI,NOSICIVI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSI,NOSICI,NOSICIVI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSICI,NOCIVI,NOSICIVI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSICIVI,NOVI,NOGFX89 --implicit-check-not=error: diff --git a/llvm/test/MC/AMDGPU/mimg-err.s b/llvm/test/MC/AMDGPU/mimg-err.s index 6cf92f29c27b7..bec33bab984ab 100644 --- a/llvm/test/MC/AMDGPU/mimg-err.s +++ b/llvm/test/MC/AMDGPU/mimg-err.s @@ -1,4 +1,3 @@ -// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOGCN --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOGCN --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=NOGCN --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 --implicit-check-not=error: diff --git a/llvm/test/MC/AMDGPU/mimg.s b/llvm/test/MC/AMDGPU/mimg.s index 29e402d9496f1..54bb2b19b2e84 100644 --- a/llvm/test/MC/AMDGPU/mimg.s +++ b/llvm/test/MC/AMDGPU/mimg.s @@ -1,11 +1,9 @@ -// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI --check-prefix=SICIVI // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI --check-prefix=SICIVI // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI --check-prefix=SICIVI // RUN: not llvm-mc -triple=amdgcn -mcpu=fiji -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICIVI --check-prefix=VI --check-prefix=GFX89 --check-prefix=GFX8_0 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx810 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICIVI --check-prefix=VI --check-prefix=GFX89 --check-prefix=GFX8_1 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX9 --check-prefix=GFX89 -// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=NOVI --check-prefix=NOGFX8_0 --implicit-check-not=error: diff --git a/llvm/test/MC/AMDGPU/regression/bug28165.s b/llvm/test/MC/AMDGPU/regression/bug28165.s index 1e31f204e8995..6d04e13316b61 100644 --- a/llvm/test/MC/AMDGPU/regression/bug28165.s +++ b/llvm/test/MC/AMDGPU/regression/bug28165.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI // RUN: llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI // RUN: llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI // RUN: llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=GCN,VI diff --git a/llvm/test/MC/AMDGPU/regression/bug28413.s b/llvm/test/MC/AMDGPU/regression/bug28413.s index 5fbf9f37d4a8d..7cf413d2d0a17 100644 --- a/llvm/test/MC/AMDGPU/regression/bug28413.s +++ b/llvm/test/MC/AMDGPU/regression/bug28413.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI // RUN: llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI // RUN: llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI // RUN: llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=GCN,VI diff --git a/llvm/test/MC/AMDGPU/smrd.s b/llvm/test/MC/AMDGPU/smrd.s index b877bce22af56..12e01321b967a 100644 --- a/llvm/test/MC/AMDGPU/smrd.s +++ b/llvm/test/MC/AMDGPU/smrd.s @@ -1,9 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck --check-prefix=GCN %s // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck --check-prefix=GCN %s // RUN: llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck --check-prefixes=GCN,CI %s // RUN: not llvm-mc -triple=amdgcn -mcpu=fiji -show-encoding %s | FileCheck --check-prefix=VI %s -// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=NOVI --implicit-check-not=error: diff --git a/llvm/test/MC/AMDGPU/sopk.s b/llvm/test/MC/AMDGPU/sopk.s index c912b83ca61c2..59c93fefcfaa2 100644 --- a/llvm/test/MC/AMDGPU/sopk.s +++ b/llvm/test/MC/AMDGPU/sopk.s @@ -1,11 +1,9 @@ -// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck --check-prefixes=GCN,SICI %s // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck --check-prefixes=GCN,SICI %s // RUN: not llvm-mc -triple=amdgcn -mcpu=fiji -show-encoding %s | FileCheck --check-prefixes=GCN,VI9,VI %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefixes=GCN,VI9,GFX9 %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefixes=GCN,GFX10 %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck -check-prefixes=GCN,GFX11 %s -// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck -check-prefix=NOSICIVI --implicit-check-not=error: %s // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSICIVI --implicit-check-not=error: %s // RUN: not llvm-mc -triple=amdgcn -mcpu=fiji %s 2>&1 | FileCheck -check-prefix=NOSICIVI --implicit-check-not=error: %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck --check-prefix=NOGFX9 --implicit-check-not=error: %s diff --git a/llvm/test/MC/AMDGPU/unknown-target-cpu.s b/llvm/test/MC/AMDGPU/unknown-target-cpu.s new file mode 100644 index 0000000000000..3d41e8eb5b2c4 --- /dev/null +++ b/llvm/test/MC/AMDGPU/unknown-target-cpu.s @@ -0,0 +1,15 @@ +// RUN: not llvm-mc -triple=amdgcn -show-encoding < %s | FileCheck %s +// RUN: not llvm-mc -triple=amdgcn -show-encoding -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR %s +// RUN: llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding < %s | FileCheck %s + +// CHECK: v_cmp_lt_f32_e32 vcc, s2, v4 ; encoding: [0x02,0x08,0x02,0x7c] +v_cmp_lt_f32 vcc, s2, v4 + +// CHECK: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x00] +v_cndmask_b32 v1, v2, v3, vcc + +// ERR: [[@LINE+1]]:1: error: instruction not supported on this GPU +v_mac_legacy_f32 v1, v3, s5 + +// CHECK: v_lshr_b32_e32 v0, v1, v2 ; encoding: [0x01,0x05,0x00,0x2a] +v_lshr_b32 v0, v1, v2 diff --git a/llvm/test/MC/AMDGPU/vintrp.s b/llvm/test/MC/AMDGPU/vintrp.s index db15f8eb4499d..35720c95cf31e 100644 --- a/llvm/test/MC/AMDGPU/vintrp.s +++ b/llvm/test/MC/AMDGPU/vintrp.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=amdgcn -show-encoding %s | FileCheck -check-prefix=SI %s +// RUN: llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck -check-prefix=SI %s // RUN: llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=VI %s v_interp_p1_f32 v1, v0, attr0.x diff --git a/llvm/test/MC/AMDGPU/vop1.s b/llvm/test/MC/AMDGPU/vop1.s index f7e5db7fa3d39..af0d289e827ee 100644 --- a/llvm/test/MC/AMDGPU/vop1.s +++ b/llvm/test/MC/AMDGPU/vop1.s @@ -1,9 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=GCN,CI,SICI,CIVI // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=GCN,CIVI,VI -// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s -check-prefix=NOVI --implicit-check-not=error: diff --git a/llvm/test/MC/AMDGPU/vop2.s b/llvm/test/MC/AMDGPU/vop2.s index ade7ce95f1758..7317ab00ad782 100644 --- a/llvm/test/MC/AMDGPU/vop2.s +++ b/llvm/test/MC/AMDGPU/vop2.s @@ -1,9 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=VI -// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s -check-prefix=NOVI --implicit-check-not=error: diff --git a/llvm/test/MC/AMDGPU/vop3-convert.s b/llvm/test/MC/AMDGPU/vop3-convert.s index 0f33a81c6ea0f..02d576fdcd845 100644 --- a/llvm/test/MC/AMDGPU/vop3-convert.s +++ b/llvm/test/MC/AMDGPU/vop3-convert.s @@ -1,9 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=GCN,VI -// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s -check-prefix=NOVI --implicit-check-not=error: diff --git a/llvm/test/MC/AMDGPU/vop3-errs.s b/llvm/test/MC/AMDGPU/vop3-errs.s index e600151410389..94fc0ea8b3e9e 100644 --- a/llvm/test/MC/AMDGPU/vop3-errs.s +++ b/llvm/test/MC/AMDGPU/vop3-errs.s @@ -1,4 +1,3 @@ -// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=GFX67 --check-prefix=GCN --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=GFX67 --check-prefix=GCN --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=GFX89 --check-prefix=GCN --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=GFX89 --check-prefix=GCN --implicit-check-not=error: diff --git a/llvm/test/MC/AMDGPU/vop3.s b/llvm/test/MC/AMDGPU/vop3.s index 0d2544002a9f2..ccae2611d4ffd 100644 --- a/llvm/test/MC/AMDGPU/vop3.s +++ b/llvm/test/MC/AMDGPU/vop3.s @@ -1,11 +1,11 @@ -// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI +// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=hawaii -show-encoding %s | FileCheck %s --check-prefix=CI --check-prefix=SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI // Make sure interp instructions disassemble regardless of lds bank count // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx810 -show-encoding %s | FileCheck %s --check-prefix=VI -// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=hawaii %s 2>&1 | FileCheck %s -check-prefix=NOCI --check-prefix=NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefix=NOVI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx810 %s 2>&1 | FileCheck -check-prefix=NOVI --implicit-check-not=error: %s diff --git a/llvm/test/MC/AMDGPU/vop_dpp.s b/llvm/test/MC/AMDGPU/vop_dpp.s index a15a48e507a62..c7cfb7ae67a97 100644 --- a/llvm/test/MC/AMDGPU/vop_dpp.s +++ b/llvm/test/MC/AMDGPU/vop_dpp.s @@ -1,7 +1,6 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=VI,VI9 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefixes=GFX9,VI9 -// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefixes=NOSICI,NOCI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefix=NOVI --implicit-check-not=error: diff --git a/llvm/test/MC/AMDGPU/vop_sdwa.s b/llvm/test/MC/AMDGPU/vop_sdwa.s index 0c803a9819a83..0e007d5e360a3 100644 --- a/llvm/test/MC/AMDGPU/vop_sdwa.s +++ b/llvm/test/MC/AMDGPU/vop_sdwa.s @@ -1,7 +1,6 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=VI,GFX89 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefixes=GFX9,GFX89 -// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefixes=NOCI,NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefixes=NOVI,NOGFX89 --implicit-check-not=error: diff --git a/llvm/test/MC/AMDGPU/vopc.s b/llvm/test/MC/AMDGPU/vopc.s index 55289c0a463fa..9ff4f7eda73a0 100644 --- a/llvm/test/MC/AMDGPU/vopc.s +++ b/llvm/test/MC/AMDGPU/vopc.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI // RUN: llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=SICI // RUN: llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI diff --git a/llvm/test/MC/AMDGPU/wave_any.s b/llvm/test/MC/AMDGPU/wave_any.s index 825a0abc17224..27502eff89bfc 100644 --- a/llvm/test/MC/AMDGPU/wave_any.s +++ b/llvm/test/MC/AMDGPU/wave_any.s @@ -1,13 +1,13 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX10 %s v_cmp_ge_i32_e32 s0, v0 -// GFX10: v_cmp_ge_i32_e32 vcc, s0, v0 ; encoding: [0x00,0x00,0x0c,0x7d] +// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x0c,0x7d] v_cmp_ge_i32_e32 vcc_lo, s0, v1 -// GFX10: v_cmp_ge_i32_e32 vcc, s0, v1 ; encoding: [0x00,0x02,0x0c,0x7d] +// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v1 ; encoding: [0x00,0x02,0x0c,0x7d] v_cmp_ge_i32_e32 vcc, s0, v2 -// GFX10: v_cmp_ge_i32_e32 vcc, s0, v2 ; encoding: [0x00,0x04,0x0c,0x7d] +// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v2 ; encoding: [0x00,0x04,0x0c,0x7d] v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD // GFX10: v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06] @@ -16,10 +16,10 @@ v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD // GFX10: v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06] v_cmp_class_f32_e32 vcc_lo, s0, v0 -// GFX10: v_cmp_class_f32_e32 vcc, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d] +// GFX10: v_cmp_class_f32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d] v_cmp_class_f32_e32 vcc, s0, v0 -// GFX10: v_cmp_class_f32_e32 vcc, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d] +// GFX10: v_cmp_class_f32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d] v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD // GFX10: v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x00,0x06,0x06] @@ -34,40 +34,40 @@ v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD // GFX10: v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x80,0x06,0x06] v_cndmask_b32_e32 v1, v2, v3, -// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x02] +// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02] v_cndmask_b32_e32 v1, v2, v3, vcc_lo -// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x02] +// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02] v_cndmask_b32_e32 v1, v2, v3, vcc -// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x02] +// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02] v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo -// GFX10: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x50] +// GFX10: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50] v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc -// GFX10: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x50] +// GFX10: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50] v_add_co_ci_u32_e32 v3, v3, v4 -// GFX10: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x50] +// GFX10: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50] v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo -// GFX10: v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x52] +// GFX10: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x52] v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc -// GFX10: v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x52] +// GFX10: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x52] v_sub_co_ci_u32_e32 v3, v3, v4 -// GFX10: v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x52] +// GFX10: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x52] v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo -// GFX10: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x54] +// GFX10: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; encoding: [0x80,0x02,0x02,0x54] v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc -// GFX10: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x54] +// GFX10: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; encoding: [0x80,0x02,0x02,0x54] v_subrev_co_ci_u32_e32 v1, 0, v1 -// GFX10: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x54] +// GFX10: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; encoding: [0x80,0x02,0x02,0x54] v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD // GFX10: v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06] @@ -76,7 +76,7 @@ v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD sr // GFX10: v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06] v_add_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -// GFX10: v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06] +// GFX10: v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06] v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD // GFX10: v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06] @@ -85,7 +85,7 @@ v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD sr // GFX10: v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06] v_sub_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -// GFX10: v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06] +// GFX10: v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06] v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD // GFX10: v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06] @@ -94,10 +94,10 @@ v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD // GFX10: v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06] v_subrev_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -// GFX10: v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06] +// GFX10: v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06] v_add_co_ci_u32 v1, sext(v1), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -// GFX10: v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e] +// GFX10: v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e] v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD // GFX10: v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e] @@ -106,7 +106,7 @@ v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:U // GFX10: v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e] v_add_co_ci_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX10: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00] +// GFX10: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00] v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 // GFX10: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00] @@ -189,8 +189,8 @@ v_subrev_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s2 // GFX10: v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s2 ; encoding: [0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00] -v_add_co_ci_u32_e64 v4, vcc, v1, v5, s[2:3] -// GFX10: v_add_co_ci_u32_e64 v4, vcc, v1, v5, s[2:3] ; encoding: [0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00] +v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s[2:3] +// GFX10: v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s[2:3] ; encoding: [0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00] v_add_co_ci_u32_e64 v4, s0, v1, v5, vcc_lo // GFX10: v_add_co_ci_u32_e64 v4, s0, v1, v5, vcc_lo ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0xaa,0x01] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950.txt index ce37e228f03fa..9fc9c58387b90 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx950.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950.txt @@ -42,3 +42,80 @@ # GFX950: buffer_load_dwordx4 v0, s[8:11], s101 offen lds ; encoding: [0x00,0x10,0x5d,0xe0,0x00,0x00,0x02,0x65] 0x00,0x10,0x5d,0xe0,0x00,0x00,0x02,0x65 + + +# GFX950: v_permlane16_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb3,0x02,0x7e] +0x02,0xb3,0x02,0x7e + +# GFX950: v_permlane16_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x99,0xd1,0x02,0x01,0x00,0x00] +0x01,0x00,0x99,0xd1,0x02,0x01,0x00,0x00 + +# GFX950: v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x99,0xd1,0x02,0x01,0x00,0x00] +0x01,0x10,0x99,0xd1,0x02,0x01,0x00,0x00 + +# GFX950: v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 fi:1 ; encoding: [0x01,0x18,0x99,0xd1,0x02,0x01,0x00,0x00] +0x01,0x18,0x99,0xd1,0x02,0x01,0x00,0x00 + +# GFX950: v_permlane16_swap_b32_e64 v1, v2 fi:1 ; encoding: [0x01,0x08,0x99,0xd1,0x02,0x01,0x00,0x00] +0x01,0x08,0x99,0xd1,0x02,0x01,0x00,0x00 + + +# GFX950: v_permlane32_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb5,0x02,0x7e] +0x02,0xb5,0x02,0x7e + +# GFX950: v_permlane32_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00] +0x01,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00 + +# GFX950: v_permlane32_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x9a,0xd1,0x02,0x01,0x00,0x00] +0x01,0x10,0x9a,0xd1,0x02,0x01,0x00,0x00 + +# GFX950: v_permlane32_swap_b32_e64 v1, v2 bound_ctrl:1 fi:1 ; encoding: [0x01,0x18,0x9a,0xd1,0x02,0x01,0x00,0x00] +0x01,0x18,0x9a,0xd1,0x02,0x01,0x00,0x00 + +# GFX950: v_permlane32_swap_b32_e64 v1, v2 fi:1 ; encoding: [0x01,0x08,0x9a,0xd1,0x02,0x01,0x00,0x00] +0x01,0x08,0x9a,0xd1,0x02,0x01,0x00,0x00 + +# GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x03] +0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x03 + +# GFX950: buffer_atomic_pk_add_bf16 v255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0xff,0x02,0x03] +0xff,0x0f,0x48,0xe1,0x00,0xff,0x02,0x03 + +# GFX950: buffer_atomic_pk_add_bf16 v5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x03,0x03] +0xff,0x0f,0x48,0xe1,0x00,0x05,0x03,0x03 + +# GFX950: buffer_atomic_pk_add_bf16 v5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x18,0x03] +0xff,0x0f,0x48,0xe1,0x00,0x05,0x18,0x03 + +# GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x65] +0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x65 + +# GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x7c] +0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x7c + +# GFX950: buffer_atomic_pk_add_bf16 v5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x48,0xe1,0x00,0x05,0x02,0x03] +0xff,0x2f,0x48,0xe1,0x00,0x05,0x02,0x03 + +# GFX950: buffer_atomic_pk_add_bf16 v5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x48,0xe1,0x00,0x05,0x02,0x03] +0xff,0x1f,0x48,0xe1,0x00,0x05,0x02,0x03 + +# GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x48,0xe1,0x00,0x05,0x02,0x03] +0x00,0x00,0x48,0xe1,0x00,0x05,0x02,0x03 + +# GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x48,0xe1,0x00,0x05,0x02,0x03] +0x00,0x00,0x48,0xe1,0x00,0x05,0x02,0x03 + +# GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x48,0xe1,0x00,0x05,0x02,0x03] +0x07,0x00,0x48,0xe1,0x00,0x05,0x02,0x03 + +# GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x80] +0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x80 + +# GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xc1] +0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xc1 + +# GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xf0] +0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xf0 + +# GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xf7] +0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xf7 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_ds_read_tr.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_ds_read_tr.txt new file mode 100644 index 0000000000000..1efd2d7b996d4 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_ds_read_tr.txt @@ -0,0 +1,37 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -disassemble -show-encoding %s | FileCheck -check-prefix=GFX950 %s + +# GFX950: ds_read_b64_tr_b4 v[0:1], v0 ; encoding: [0x00,0x00,0xc0,0xd9,0x00,0x00,0x00,0x00] +0x00,0x00,0xc0,0xd9,0x00,0x00,0x00,0x00 + +# GFX950: ds_read_b64_tr_b4 v[2:3], v2 ; encoding: [0x00,0x00,0xc0,0xd9,0x02,0x00,0x00,0x02] +0x00,0x00,0xc0,0xd9,0x02,0x00,0x00,0x02 + +# GFX950: ds_read_b64_tr_b4 v[2:3], v2 offset:64 ; encoding: [0x40,0x00,0xc0,0xd9,0x02,0x00,0x00,0x02] +0x40,0x00,0xc0,0xd9,0x02,0x00,0x00,0x02 + +# GFX950: ds_read_b64_tr_b8 v[0:1], v0 ; encoding: [0x00,0x00,0xc4,0xd9,0x00,0x00,0x00,0x00] +0x00,0x00,0xc4,0xd9,0x00,0x00,0x00,0x00 + +# GFX950: ds_read_b64_tr_b8 v[2:3], v2 ; encoding: [0x00,0x00,0xc4,0xd9,0x02,0x00,0x00,0x02] +0x00,0x00,0xc4,0xd9,0x02,0x00,0x00,0x02 + +# GFX950: ds_read_b64_tr_b8 v[2:3], v2 offset:64 ; encoding: [0x40,0x00,0xc4,0xd9,0x02,0x00,0x00,0x02] +0x40,0x00,0xc4,0xd9,0x02,0x00,0x00,0x02 + +# GFX950: ds_read_b64_tr_b16 v[0:1], v0 ; encoding: [0x00,0x00,0xc6,0xd9,0x00,0x00,0x00,0x00] +0x00,0x00,0xc6,0xd9,0x00,0x00,0x00,0x00 + +# GFX950: ds_read_b64_tr_b16 v[2:3], v2 ; encoding: [0x00,0x00,0xc6,0xd9,0x02,0x00,0x00,0x02] +0x00,0x00,0xc6,0xd9,0x02,0x00,0x00,0x02 + +# GFX950: ds_read_b64_tr_b16 v[2:3], v2 offset:64 ; encoding: [0x40,0x00,0xc6,0xd9,0x02,0x00,0x00,0x02] +0x40,0x00,0xc6,0xd9,0x02,0x00,0x00,0x02 + +# GFX950: ds_read_b96_tr_b6 v[0:2], v0 ; encoding: [0x00,0x00,0xc2,0xd9,0x00,0x00,0x00,0x00] +0x00,0x00,0xc2,0xd9,0x00,0x00,0x00,0x00 + +# GFX950: ds_read_b96_tr_b6 v[2:4], v2 ; encoding: [0x00,0x00,0xc2,0xd9,0x02,0x00,0x00,0x02] +0x00,0x00,0xc2,0xd9,0x02,0x00,0x00,0x02 + +# GFX950: ds_read_b96_tr_b6 v[2:4], v2 offset:64 ; encoding: [0x40,0x00,0xc2,0xd9,0x02,0x00,0x00,0x02] +0x40,0x00,0xc2,0xd9,0x02,0x00,0x00,0x02 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt index 909743c2babf5..adb4f78942503 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt @@ -17,3 +17,928 @@ # GFX950: v_cvt_pk_bf16_f32 v5, 0.5, m0 mul:2 ; encoding: [0x05,0x00,0x68,0xd2,0xf0,0xf8,0x00,0x08] 0x05,0x00,0x68,0xd2,0xf0,0xf8,0x00,0x08 + +# GFX950: v_bitop3_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x34,0xd2,0x01,0x05,0x0e,0x00] +0x05,0x00,0x34,0xd2,0x01,0x05,0x0e,0x00 + +# GFX950: v_bitop3_b32 v5, v1, v2, s3 bitop3:0xa1 ; encoding: [0x05,0x04,0x34,0xd2,0x01,0x05,0x0e,0x30] +0x05,0x04,0x34,0xd2,0x01,0x05,0x0e,0x30 + +# GFX950: v_bitop3_b32 v5, m0, 0.5, m0 bitop3:5 ; encoding: [0x05,0x00,0x34,0xd2,0x7c,0xe0,0xf1,0xa1] +0x05,0x00,0x34,0xd2,0x7c,0xe0,0xf1,0xa1 + +# GFX950: v_bitop3_b32 v5, 0.5, m0, 0.5 bitop3:0x65 ; encoding: [0x05,0x04,0x34,0xd2,0xf0,0xf8,0xc0,0xab] +0x05,0x04,0x34,0xd2,0xf0,0xf8,0xc0,0xab + +# GFX950: v_bitop3_b16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x33,0xd2,0x01,0x05,0x0e,0x00] +0x05,0x00,0x33,0xd2,0x01,0x05,0x0e,0x00 + +# GFX950: v_bitop3_b16 v5, v1, v2, s3 bitop3:0xa1 ; encoding: [0x05,0x04,0x33,0xd2,0x01,0x05,0x0e,0x30] +0x05,0x04,0x33,0xd2,0x01,0x05,0x0e,0x30 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00] +0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x4a,0xd2,0x02,0x07,0x02,0x00] +0x01,0x08,0x4a,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x4a,0xd2,0x02,0x07,0x02,0x00] +0x01,0x10,0x4a,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x4a,0xd2,0x02,0x07,0x02,0x00] +0x01,0x18,0x4a,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x4a,0xd2,0x02,0x07,0x02,0x00] +0x01,0x40,0x4a,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x4a,0xd2,0x02,0x07,0x02,0x00] +0x01,0x48,0x4a,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4a,0xd2,0x02,0x07,0x02,0x00] +0x01,0x50,0x4a,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x4a,0xd2,0x02,0x07,0x02,0x00] +0x01,0x58,0x4a,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, s1, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x01,0x06,0x02,0x00] +0x01,0x00,0x4a,0xd2,0x01,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, s2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x4a,0xd2,0x02,0x06,0x02,0x00] +0x01,0x08,0x4a,0xd2,0x02,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, s3, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x4a,0xd2,0x03,0x06,0x02,0x00] +0x01,0x10,0x4a,0xd2,0x03,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, s4, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x4a,0xd2,0x04,0x06,0x02,0x00] +0x01,0x18,0x4a,0xd2,0x04,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, s1, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x4a,0xd2,0x01,0x06,0x02,0x00] +0x01,0x40,0x4a,0xd2,0x01,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, s2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x4a,0xd2,0x02,0x06,0x02,0x00] +0x01,0x48,0x4a,0xd2,0x02,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, s3, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4a,0xd2,0x03,0x06,0x02,0x00] +0x01,0x50,0x4a,0xd2,0x03,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, s4, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x4a,0xd2,0x04,0x06,0x02,0x00] +0x01,0x58,0x4a,0xd2,0x04,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, 11, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x8b,0x06,0x02,0x00] +0x01,0x00,0x4a,0xd2,0x8b,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, 22, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x4a,0xd2,0x96,0x06,0x02,0x00] +0x01,0x08,0x4a,0xd2,0x96,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, 33, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x4a,0xd2,0xa1,0x06,0x02,0x00] +0x01,0x10,0x4a,0xd2,0xa1,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, 44, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x4a,0xd2,0xac,0x06,0x02,0x00] +0x01,0x18,0x4a,0xd2,0xac,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, 11, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4a,0xd2,0x8b,0x06,0x02,0x00] +0x01,0x50,0x4a,0xd2,0x8b,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, 22, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x4a,0xd2,0x96,0x06,0x02,0x00] +0x01,0x48,0x4a,0xd2,0x96,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, 33, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4a,0xd2,0xa1,0x06,0x02,0x00] +0x01,0x50,0x4a,0xd2,0xa1,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_fp8 v1, 44, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x4a,0xd2,0xac,0x06,0x02,0x00] +0x01,0x58,0x4a,0xd2,0xac,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 ; encoding: [0x01,0x00,0x3b,0xd2,0x02,0x07,0x02,0x00] +0x01,0x00,0x3b,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x3b,0xd2,0x02,0x07,0x02,0x00] +0x01,0x08,0x3b,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x3b,0xd2,0x02,0x07,0x02,0x00] +0x01,0x10,0x3b,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x3b,0xd2,0x02,0x07,0x02,0x00] +0x01,0x18,0x3b,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x3b,0xd2,0x02,0x07,0x02,0x00] +0x01,0x40,0x3b,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x3b,0xd2,0x02,0x07,0x02,0x00] +0x01,0x48,0x3b,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x3b,0xd2,0x02,0x07,0x02,0x00] +0x01,0x50,0x3b,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3b,0xd2,0x02,0x07,0x02,0x00] +0x01,0x58,0x3b,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, s1, v3 ; encoding: [0x01,0x00,0x3b,0xd2,0x01,0x06,0x02,0x00] +0x01,0x00,0x3b,0xd2,0x01,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, s2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x3b,0xd2,0x02,0x06,0x02,0x00] +0x01,0x08,0x3b,0xd2,0x02,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, s3, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x3b,0xd2,0x03,0x06,0x02,0x00] +0x01,0x10,0x3b,0xd2,0x03,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, s4, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x3b,0xd2,0x04,0x06,0x02,0x00] +0x01,0x18,0x3b,0xd2,0x04,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, s1, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x3b,0xd2,0x01,0x06,0x02,0x00] +0x01,0x40,0x3b,0xd2,0x01,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, s2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x3b,0xd2,0x02,0x06,0x02,0x00] +0x01,0x48,0x3b,0xd2,0x02,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, s3, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x3b,0xd2,0x03,0x06,0x02,0x00] +0x01,0x50,0x3b,0xd2,0x03,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, s4, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3b,0xd2,0x04,0x06,0x02,0x00] +0x01,0x58,0x3b,0xd2,0x04,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, 11, v3 ; encoding: [0x01,0x00,0x3b,0xd2,0x8b,0x06,0x02,0x00] +0x01,0x00,0x3b,0xd2,0x8b,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, 22, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x3b,0xd2,0x96,0x06,0x02,0x00] +0x01,0x08,0x3b,0xd2,0x96,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, 33, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x3b,0xd2,0xa1,0x06,0x02,0x00] +0x01,0x10,0x3b,0xd2,0xa1,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, 44, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x3b,0xd2,0xac,0x06,0x02,0x00] +0x01,0x18,0x3b,0xd2,0xac,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, 11, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x3b,0xd2,0x8b,0x06,0x02,0x00] +0x01,0x40,0x3b,0xd2,0x8b,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, 22, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x3b,0xd2,0x96,0x06,0x02,0x00] +0x01,0x48,0x3b,0xd2,0x96,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, 33, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x3b,0xd2,0xa1,0x06,0x02,0x00] +0x01,0x50,0x3b,0xd2,0xa1,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_fp8 v1, 44, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3b,0xd2,0xac,0x06,0x02,0x00] +0x01,0x58,0x3b,0xd2,0xac,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 ; encoding: [0x01,0x00,0x4b,0xd2,0x02,0x07,0x02,0x00] +0x01,0x00,0x4b,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x4b,0xd2,0x02,0x07,0x02,0x00] +0x01,0x08,0x4b,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x4b,0xd2,0x02,0x07,0x02,0x00] +0x01,0x10,0x4b,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x4b,0xd2,0x02,0x07,0x02,0x00] +0x01,0x18,0x4b,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x4b,0xd2,0x02,0x07,0x02,0x00] +0x01,0x40,0x4b,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x4b,0xd2,0x02,0x07,0x02,0x00] +0x01,0x48,0x4b,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4b,0xd2,0x02,0x07,0x02,0x00] +0x01,0x50,0x4b,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x4b,0xd2,0x02,0x07,0x02,0x00] +0x01,0x58,0x4b,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_bf8 v1, s1, v3 ; encoding: [0x01,0x00,0x4b,0xd2,0x01,0x06,0x02,0x00] +0x01,0x00,0x4b,0xd2,0x01,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_bf8 v1, s2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x4b,0xd2,0x02,0x06,0x02,0x00] +0x01,0x08,0x4b,0xd2,0x02,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_bf8 v1, s3, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x4b,0xd2,0x03,0x06,0x02,0x00] +0x01,0x10,0x4b,0xd2,0x03,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_bf8 v1, s4, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x4b,0xd2,0x04,0x06,0x02,0x00] +0x01,0x18,0x4b,0xd2,0x04,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_bf8 v1, s1, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x4b,0xd2,0x01,0x06,0x02,0x00] +0x01,0x40,0x4b,0xd2,0x01,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_bf8 v1, s2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x4b,0xd2,0x02,0x06,0x02,0x00] +0x01,0x48,0x4b,0xd2,0x02,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_bf8 v1, s3, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4b,0xd2,0x03,0x06,0x02,0x00] +0x01,0x50,0x4b,0xd2,0x03,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_bf8 v1, s4, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x4b,0xd2,0x04,0x06,0x02,0x00] +0x01,0x58,0x4b,0xd2,0x04,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_bf8 v1, 11, v3 ; encoding: [0x01,0x00,0x4b,0xd2,0x8b,0x06,0x02,0x00] +0x01,0x00,0x4b,0xd2,0x8b,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_bf8 v1, 22, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x4b,0xd2,0x96,0x06,0x02,0x00] +0x01,0x08,0x4b,0xd2,0x96,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_bf8 v1, 33, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x4b,0xd2,0xa1,0x06,0x02,0x00] +0x01,0x10,0x4b,0xd2,0xa1,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_bf8 v1, 44, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x4b,0xd2,0xac,0x06,0x02,0x00] +0x01,0x18,0x4b,0xd2,0xac,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_bf8 v1, 11, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4b,0xd2,0x8b,0x06,0x02,0x00] +0x01,0x50,0x4b,0xd2,0x8b,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_bf8 v1, 22, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x4b,0xd2,0x96,0x06,0x02,0x00] +0x01,0x48,0x4b,0xd2,0x96,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_bf8 v1, 33, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4b,0xd2,0xa1,0x06,0x02,0x00] +0x01,0x50,0x4b,0xd2,0xa1,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f16_bf8 v1, 44, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x4b,0xd2,0xac,0x06,0x02,0x00] +0x01,0x58,0x4b,0xd2,0xac,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 ; encoding: [0x01,0x00,0x3c,0xd2,0x02,0x07,0x02,0x00] +0x01,0x00,0x3c,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x3c,0xd2,0x02,0x07,0x02,0x00] +0x01,0x08,0x3c,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x3c,0xd2,0x02,0x07,0x02,0x00] +0x01,0x10,0x3c,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x3c,0xd2,0x02,0x07,0x02,0x00] +0x01,0x18,0x3c,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x3c,0xd2,0x02,0x07,0x02,0x00] +0x01,0x40,0x3c,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x3c,0xd2,0x02,0x07,0x02,0x00] +0x01,0x48,0x3c,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x3c,0xd2,0x02,0x07,0x02,0x00] +0x01,0x50,0x3c,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3c,0xd2,0x02,0x07,0x02,0x00] +0x01,0x58,0x3c,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_bf8 v1, s1, v3 ; encoding: [0x01,0x00,0x3c,0xd2,0x01,0x06,0x02,0x00] +0x01,0x00,0x3c,0xd2,0x01,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_bf8 v1, s2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x3c,0xd2,0x02,0x06,0x02,0x00] +0x01,0x08,0x3c,0xd2,0x02,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_bf8 v1, s3, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x3c,0xd2,0x03,0x06,0x02,0x00] +0x01,0x10,0x3c,0xd2,0x03,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_bf8 v1, s4, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x3c,0xd2,0x04,0x06,0x02,0x00] +0x01,0x18,0x3c,0xd2,0x04,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_bf8 v1, s1, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x3c,0xd2,0x01,0x06,0x02,0x00] +0x01,0x40,0x3c,0xd2,0x01,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_bf8 v1, s2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x3c,0xd2,0x02,0x06,0x02,0x00] +0x01,0x48,0x3c,0xd2,0x02,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_bf8 v1, s3, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x3c,0xd2,0x03,0x06,0x02,0x00] +0x01,0x50,0x3c,0xd2,0x03,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_bf8 v1, s4, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3c,0xd2,0x04,0x06,0x02,0x00] +0x01,0x58,0x3c,0xd2,0x04,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_bf8 v1, 11, v3 ; encoding: [0x01,0x00,0x3c,0xd2,0x8b,0x06,0x02,0x00] +0x01,0x00,0x3c,0xd2,0x8b,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_bf8 v1, 22, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x3c,0xd2,0x96,0x06,0x02,0x00] +0x01,0x08,0x3c,0xd2,0x96,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_bf8 v1, 33, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x3c,0xd2,0xa1,0x06,0x02,0x00] +0x01,0x10,0x3c,0xd2,0xa1,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_bf8 v1, 44, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x3c,0xd2,0xac,0x06,0x02,0x00] +0x01,0x18,0x3c,0xd2,0xac,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_bf8 v1, 11, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x3c,0xd2,0x8b,0x06,0x02,0x00] +0x01,0x40,0x3c,0xd2,0x8b,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_bf8 v1, 22, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x3c,0xd2,0x96,0x06,0x02,0x00] +0x01,0x48,0x3c,0xd2,0x96,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_bf8 v1, 33, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x3c,0xd2,0xa1,0x06,0x02,0x00] +0x01,0x50,0x3c,0xd2,0xa1,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_f32_bf8 v1, 44, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3c,0xd2,0xac,0x06,0x02,0x00] +0x01,0x58,0x3c,0xd2,0xac,0x06,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, v2, v3 ; encoding: [0x01,0x00,0x35,0xd2,0x01,0x05,0x0e,0x04] +0x01,0x00,0x35,0xd2,0x01,0x05,0x0e,0x04 + +# GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, -v2, |v3| ; encoding: [0x01,0x04,0x35,0xd2,0x01,0x05,0x0e,0x44] +0x01,0x04,0x35,0xd2,0x01,0x05,0x0e,0x44 + +# GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, s2, 3 ; encoding: [0x01,0x00,0x35,0xd2,0x01,0x05,0x0c,0x02] +0x01,0x00,0x35,0xd2,0x01,0x05,0x0c,0x02 + +# GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x35,0xd2,0x01,0x05,0x0e,0x04] +0x01,0x40,0x35,0xd2,0x01,0x05,0x0e,0x04 + +# GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1] ; encoding: [0x01,0x44,0x35,0xd2,0x01,0x05,0x0e,0x44] +0x01,0x44,0x35,0xd2,0x01,0x05,0x0e,0x44 + +# GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x35,0xd2,0x01,0x05,0x0c,0x02] +0x01,0x40,0x35,0xd2,0x01,0x05,0x0c,0x02 + +# GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, v2, v3 ; encoding: [0x01,0x00,0x36,0xd2,0x01,0x05,0x0e,0x04] +0x01,0x00,0x36,0xd2,0x01,0x05,0x0e,0x04 + +# GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, -v2, |v3| ; encoding: [0x01,0x04,0x36,0xd2,0x01,0x05,0x0e,0x44] +0x01,0x04,0x36,0xd2,0x01,0x05,0x0e,0x44 + +# GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, s2, 3 ; encoding: [0x01,0x00,0x36,0xd2,0x01,0x05,0x0c,0x02] +0x01,0x00,0x36,0xd2,0x01,0x05,0x0c,0x02 + +# GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x36,0xd2,0x01,0x05,0x0e,0x04] +0x01,0x40,0x36,0xd2,0x01,0x05,0x0e,0x04 + +# GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1] ; encoding: [0x01,0x44,0x36,0xd2,0x01,0x05,0x0e,0x44] +0x01,0x44,0x36,0xd2,0x01,0x05,0x0e,0x44 + +# GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x36,0xd2,0x01,0x05,0x0c,0x02] +0x01,0x40,0x36,0xd2,0x01,0x05,0x0c,0x02 + +# GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, v3 ; encoding: [0x02,0x00,0x39,0xd2,0x02,0x07,0x02,0x00] +0x02,0x00,0x39,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, s3 ; encoding: [0x02,0x00,0x39,0xd2,0x02,0x07,0x00,0x00] +0x02,0x00,0x39,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], s2, 3 ; encoding: [0x02,0x00,0x39,0xd2,0x02,0x06,0x01,0x00] +0x02,0x00,0x39,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, v3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x39,0xd2,0x02,0x07,0x02,0x00] +0x02,0x08,0x39,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, s3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x39,0xd2,0x02,0x07,0x00,0x00] +0x02,0x08,0x39,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], s2, 3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x39,0xd2,0x02,0x06,0x01,0x00] +0x02,0x08,0x39,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, v3 ; encoding: [0x02,0x00,0x3a,0xd2,0x02,0x07,0x02,0x00] +0x02,0x00,0x3a,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, s3 ; encoding: [0x02,0x00,0x3a,0xd2,0x02,0x07,0x00,0x00] +0x02,0x00,0x3a,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], s2, 3 ; encoding: [0x02,0x00,0x3a,0xd2,0x02,0x06,0x01,0x00] +0x02,0x00,0x3a,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, v3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3a,0xd2,0x02,0x07,0x02,0x00] +0x02,0x08,0x3a,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, s3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3a,0xd2,0x02,0x07,0x00,0x00] +0x02,0x08,0x3a,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], s2, 3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3a,0xd2,0x02,0x06,0x01,0x00] +0x02,0x08,0x3a,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x40,0xd2,0x02,0x07,0x02,0x00] +0x01,0x00,0x40,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_fp8_f16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x40,0xd2,0x02,0x07,0x02,0x20] +0x01,0x02,0x40,0xd2,0x02,0x07,0x02,0x20 + +# GFX950: v_cvt_scalef32_pk_fp8_f16 v1, s2, 3 ; encoding: [0x01,0x00,0x40,0xd2,0x02,0x06,0x01,0x00] +0x01,0x00,0x40,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x40,0xd2,0x02,0x07,0x02,0x00] +0x01,0x40,0x40,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_fp8_f16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x40,0xd2,0x02,0x07,0x02,0x20] +0x01,0x42,0x40,0xd2,0x02,0x07,0x02,0x20 + +# GFX950: v_cvt_scalef32_pk_fp8_f16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x40,0xd2,0x02,0x06,0x01,0x00] +0x01,0x40,0x40,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 ; encoding: [0x01,0x00,0x44,0xd2,0x02,0x07,0x02,0x00] +0x01,0x00,0x44,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x44,0xd2,0x02,0x07,0x02,0x20] +0x01,0x02,0x44,0xd2,0x02,0x07,0x02,0x20 + +# GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, s2, 3 ; encoding: [0x01,0x00,0x44,0xd2,0x02,0x06,0x01,0x00] +0x01,0x00,0x44,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x44,0xd2,0x02,0x07,0x02,0x00] +0x01,0x40,0x44,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x44,0xd2,0x02,0x07,0x02,0x20] +0x01,0x42,0x44,0xd2,0x02,0x07,0x02,0x20 + +# GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x44,0xd2,0x02,0x06,0x01,0x00] +0x01,0x40,0x44,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x41,0xd2,0x02,0x07,0x02,0x00] +0x01,0x00,0x41,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_bf8_f16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x41,0xd2,0x02,0x07,0x02,0x20] +0x01,0x02,0x41,0xd2,0x02,0x07,0x02,0x20 + +# GFX950: v_cvt_scalef32_pk_bf8_f16 v1, s2, 3 ; encoding: [0x01,0x00,0x41,0xd2,0x02,0x06,0x01,0x00] +0x01,0x00,0x41,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x41,0xd2,0x02,0x07,0x02,0x00] +0x01,0x40,0x41,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_bf8_f16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x41,0xd2,0x02,0x07,0x02,0x20] +0x01,0x42,0x41,0xd2,0x02,0x07,0x02,0x20 + +# GFX950: v_cvt_scalef32_pk_bf8_f16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x41,0xd2,0x02,0x06,0x01,0x00] +0x01,0x40,0x41,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 ; encoding: [0x01,0x00,0x45,0xd2,0x02,0x07,0x02,0x00] +0x01,0x00,0x45,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x45,0xd2,0x02,0x07,0x02,0x20] +0x01,0x02,0x45,0xd2,0x02,0x07,0x02,0x20 + +# GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 ; encoding: [0x01,0x00,0x45,0xd2,0x02,0x06,0x01,0x00] +0x01,0x00,0x45,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x45,0xd2,0x02,0x07,0x02,0x00] +0x01,0x40,0x45,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x45,0xd2,0x02,0x07,0x02,0x20] +0x01,0x42,0x45,0xd2,0x02,0x07,0x02,0x20 + +# GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x45,0xd2,0x02,0x06,0x01,0x00] +0x01,0x40,0x45,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 ; encoding: [0x02,0x00,0x3f,0xd2,0x02,0x07,0x02,0x00] +0x02,0x00,0x3f,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 ; encoding: [0x02,0x00,0x3f,0xd2,0x02,0x07,0x00,0x00] +0x02,0x00,0x3f,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 ; encoding: [0x02,0x00,0x3f,0xd2,0x02,0x06,0x01,0x00] +0x02,0x00,0x3f,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3f,0xd2,0x02,0x07,0x02,0x00] +0x02,0x08,0x3f,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3f,0xd2,0x02,0x07,0x00,0x00] +0x02,0x08,0x3f,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3f,0xd2,0x02,0x06,0x01,0x00] +0x02,0x08,0x3f,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[0,1,0] ; encoding: [0x02,0x10,0x3f,0xd2,0x02,0x07,0x02,0x00] +0x02,0x10,0x3f,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[0,1,0] ; encoding: [0x02,0x10,0x3f,0xd2,0x02,0x07,0x00,0x00] +0x02,0x10,0x3f,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[0,1,0] ; encoding: [0x02,0x10,0x3f,0xd2,0x02,0x06,0x01,0x00] +0x02,0x10,0x3f,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[1,1,0] ; encoding: [0x02,0x18,0x3f,0xd2,0x02,0x07,0x02,0x00] +0x02,0x18,0x3f,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[1,1,0] ; encoding: [0x02,0x18,0x3f,0xd2,0x02,0x07,0x00,0x00] +0x02,0x18,0x3f,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[1,1,0] ; encoding: [0x02,0x18,0x3f,0xd2,0x02,0x06,0x01,0x00] +0x02,0x18,0x3f,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 ; encoding: [0x01,0x00,0x3d,0xd2,0x01,0x05,0x0e,0x04] +0x01,0x00,0x3d,0xd2,0x01,0x05,0x0e,0x04 + +# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| ; encoding: [0x01,0x04,0x3d,0xd2,0x01,0x05,0x0e,0x44] +0x01,0x04,0x3d,0xd2,0x01,0x05,0x0e,0x44 + +# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 ; encoding: [0x01,0x00,0x3d,0xd2,0x01,0x05,0x0c,0x02] +0x01,0x00,0x3d,0xd2,0x01,0x05,0x0c,0x02 + +# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x01,0x20,0x3d,0xd2,0x01,0x05,0x0e,0x04] +0x01,0x20,0x3d,0xd2,0x01,0x05,0x0e,0x04 + +# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,1,0] ; encoding: [0x01,0x24,0x3d,0xd2,0x01,0x05,0x0e,0x44] +0x01,0x24,0x3d,0xd2,0x01,0x05,0x0e,0x44 + +# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,0] ; encoding: [0x01,0x20,0x3d,0xd2,0x01,0x05,0x0c,0x02] +0x01,0x20,0x3d,0xd2,0x01,0x05,0x0c,0x02 + +# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x3d,0xd2,0x01,0x05,0x0e,0x04] +0x01,0x40,0x3d,0xd2,0x01,0x05,0x0e,0x04 + +# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1] ; encoding: [0x01,0x44,0x3d,0xd2,0x01,0x05,0x0e,0x44] +0x01,0x44,0x3d,0xd2,0x01,0x05,0x0e,0x44 + +# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x3d,0xd2,0x01,0x05,0x0c,0x02] +0x01,0x40,0x3d,0xd2,0x01,0x05,0x0c,0x02 + +# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x3d,0xd2,0x01,0x05,0x0e,0x04] +0x01,0x60,0x3d,0xd2,0x01,0x05,0x0e,0x04 + +# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,1,1] ; encoding: [0x01,0x64,0x3d,0xd2,0x01,0x05,0x0e,0x44] +0x01,0x64,0x3d,0xd2,0x01,0x05,0x0e,0x44 + +# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x3d,0xd2,0x01,0x05,0x0c,0x02] +0x01,0x60,0x3d,0xd2,0x01,0x05,0x0c,0x02 + +# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x02,0x00] +0x01,0x00,0x50,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x00,0x00] +0x01,0x00,0x50,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x06,0x01,0x00] +0x01,0x00,0x50,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x50,0xd2,0x02,0x07,0x02,0x00] +0x01,0x08,0x50,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x50,0xd2,0x02,0x07,0x00,0x00] +0x01,0x08,0x50,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x50,0xd2,0x02,0x06,0x01,0x00] +0x01,0x08,0x50,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x50,0xd2,0x02,0x07,0x02,0x00] +0x01,0x10,0x50,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x50,0xd2,0x02,0x07,0x00,0x00] +0x01,0x10,0x50,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x50,0xd2,0x02,0x06,0x01,0x00] +0x01,0x10,0x50,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x50,0xd2,0x02,0x07,0x02,0x00] +0x01,0x18,0x50,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x50,0xd2,0x02,0x07,0x00,0x00] +0x01,0x18,0x50,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x50,0xd2,0x02,0x06,0x01,0x00] +0x01,0x18,0x50,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 ; encoding: [0x01,0x00,0x51,0xd2,0x02,0x07,0x02,0x00] +0x01,0x00,0x51,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 ; encoding: [0x01,0x00,0x51,0xd2,0x02,0x07,0x00,0x00] +0x01,0x00,0x51,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 ; encoding: [0x01,0x00,0x51,0xd2,0x02,0x06,0x01,0x00] +0x01,0x00,0x51,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x51,0xd2,0x02,0x07,0x02,0x00] +0x01,0x08,0x51,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x51,0xd2,0x02,0x07,0x00,0x00] +0x01,0x08,0x51,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x51,0xd2,0x02,0x06,0x01,0x00] +0x01,0x08,0x51,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x51,0xd2,0x02,0x07,0x02,0x00] +0x01,0x10,0x51,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x51,0xd2,0x02,0x07,0x00,0x00] +0x01,0x10,0x51,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x51,0xd2,0x02,0x06,0x01,0x00] +0x01,0x10,0x51,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x07,0x02,0x00] +0x01,0x18,0x51,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x07,0x00,0x00] +0x01,0x18,0x51,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x06,0x01,0x00] +0x01,0x18,0x51,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 ; encoding: [0x02,0x00,0x56,0xd2,0x02,0x0d,0x02,0x00] +0x02,0x00,0x56,0xd2,0x02,0x0d,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 ; encoding: [0x02,0x00,0x57,0xd2,0x02,0x0d,0x02,0x00] +0x02,0x00,0x57,0xd2,0x02,0x0d,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk32_bf16_bf6 v[10:25], v[20:25], v8 ; encoding: [0x0a,0x00,0x63,0xd2,0x14,0x11,0x02,0x00] +0x0a,0x00,0x63,0xd2,0x14,0x11,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk32_f16_bf6 v[10:25], v[20:25], v8 ; encoding: [0x0a,0x00,0x62,0xd2,0x14,0x11,0x02,0x00] +0x0a,0x00,0x62,0xd2,0x14,0x11,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk32_bf16_fp6 v[10:25], v[20:25], v8 ; encoding: [0x0a,0x00,0x61,0xd2,0x14,0x11,0x02,0x00] +0x0a,0x00,0x61,0xd2,0x14,0x11,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk32_f16_fp6 v[10:25], v[20:25], v8 ; encoding: [0x0a,0x00,0x60,0xd2,0x14,0x11,0x02,0x00] +0x0a,0x00,0x60,0xd2,0x14,0x11,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk32_bf6_bf16 v[20:25], v[10:25], v8 ; encoding: [0x14,0x00,0x5b,0xd2,0x0a,0x11,0x02,0x00] +0x14,0x00,0x5b,0xd2,0x0a,0x11,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk32_bf6_f16 v[20:25], v[10:25], v8 ; encoding: [0x14,0x00,0x5a,0xd2,0x0a,0x11,0x02,0x00] +0x14,0x00,0x5a,0xd2,0x0a,0x11,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk32_fp6_bf16 v[20:25], v[10:25], v8 ; encoding: [0x14,0x00,0x59,0xd2,0x0a,0x11,0x02,0x00] +0x14,0x00,0x59,0xd2,0x0a,0x11,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk32_fp6_f16 v[20:25], v[10:25], v8 ; encoding: [0x14,0x00,0x58,0xd2,0x0a,0x11,0x02,0x00] +0x14,0x00,0x58,0xd2,0x0a,0x11,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp8 v1, v2, v3 ; encoding: [0x01,0x00,0x48,0xd2,0x02,0x07,0x02,0x00] +0x01,0x00,0x48,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp8 v1, v2, s3 ; encoding: [0x01,0x00,0x48,0xd2,0x02,0x07,0x00,0x00] +0x01,0x00,0x48,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp8 v1, s2, 3 ; encoding: [0x01,0x00,0x48,0xd2,0x02,0x06,0x01,0x00] +0x01,0x00,0x48,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x48,0xd2,0x02,0x07,0x02,0x00] +0x01,0x08,0x48,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp8 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x48,0xd2,0x02,0x07,0x00,0x00] +0x01,0x08,0x48,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x48,0xd2,0x02,0x06,0x01,0x00] +0x01,0x08,0x48,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_bf8 v1, v2, v3 ; encoding: [0x01,0x00,0x49,0xd2,0x02,0x07,0x02,0x00] +0x01,0x00,0x49,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_bf8 v1, v2, s3 ; encoding: [0x01,0x00,0x49,0xd2,0x02,0x07,0x00,0x00] +0x01,0x00,0x49,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_bf8 v1, s2, 3 ; encoding: [0x01,0x00,0x49,0xd2,0x02,0x06,0x01,0x00] +0x01,0x00,0x49,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_bf8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x49,0xd2,0x02,0x07,0x02,0x00] +0x01,0x08,0x49,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_bf8 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x49,0xd2,0x02,0x07,0x00,0x00] +0x01,0x08,0x49,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_bf8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x49,0xd2,0x02,0x06,0x01,0x00] +0x01,0x08,0x49,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, v2, v3 ; encoding: [0x01,0x00,0x69,0xd2,0x02,0x07,0x02,0x00] +0x01,0x00,0x69,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, v2, s3 ; encoding: [0x01,0x00,0x69,0xd2,0x02,0x07,0x00,0x00] +0x01,0x00,0x69,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, s2, 3 ; encoding: [0x01,0x00,0x69,0xd2,0x02,0x06,0x01,0x00] +0x01,0x00,0x69,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x69,0xd2,0x02,0x07,0x02,0x00] +0x01,0x08,0x69,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x69,0xd2,0x02,0x07,0x00,0x00] +0x01,0x08,0x69,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x69,0xd2,0x02,0x06,0x01,0x00] +0x01,0x08,0x69,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, v2, v3 ; encoding: [0x01,0x00,0x6a,0xd2,0x02,0x07,0x02,0x00] +0x01,0x00,0x6a,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, v2, s3 ; encoding: [0x01,0x00,0x6a,0xd2,0x02,0x07,0x00,0x00] +0x01,0x00,0x6a,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, s2, 3 ; encoding: [0x01,0x00,0x6a,0xd2,0x02,0x06,0x01,0x00] +0x01,0x00,0x6a,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x6a,0xd2,0x02,0x07,0x02,0x00] +0x01,0x08,0x6a,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x6a,0xd2,0x02,0x07,0x00,0x00] +0x01,0x08,0x6a,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x6a,0xd2,0x02,0x06,0x01,0x00] +0x01,0x08,0x6a,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x07,0x02,0x00] +0x01,0x00,0x4c,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_fp4_f16 v1, s2, 3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x06,0x01,0x00] +0x01,0x00,0x4c,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x4c,0xd2,0x02,0x07,0x02,0x00] +0x01,0x60,0x4c,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x4c,0xd2,0x02,0x07,0x02,0x00] +0x01,0x40,0x4c,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_fp4_f16 v1, -|s2|, v3 ; encoding: [0x01,0x01,0x4c,0xd2,0x02,0x06,0x02,0x20] +0x01,0x01,0x4c,0xd2,0x02,0x06,0x02,0x20 + +# GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 ; encoding: [0x01,0x00,0x4d,0xd2,0x02,0x07,0x02,0x00] +0x01,0x00,0x4d,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, s2, 3 ; encoding: [0x01,0x00,0x4d,0xd2,0x02,0x06,0x01,0x00] +0x01,0x00,0x4d,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x4d,0xd2,0x02,0x07,0x02,0x00] +0x01,0x60,0x4d,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x4d,0xd2,0x02,0x07,0x02,0x00] +0x01,0x40,0x4d,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, -|s2|, v3 ; encoding: [0x01,0x01,0x4d,0xd2,0x02,0x06,0x02,0x20] +0x01,0x01,0x4d,0xd2,0x02,0x06,0x02,0x20 + +# GFX950: v_ashr_pk_i8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x65,0xd2,0x04,0x08,0x09,0x04] +0x02,0x00,0x65,0xd2,0x04,0x08,0x09,0x04 + +# GFX950: v_ashr_pk_i8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x65,0xd2,0x04,0x0e,0x22,0x04] +0x02,0x00,0x65,0xd2,0x04,0x0e,0x22,0x04 + +# GFX950: v_ashr_pk_i8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x65,0xd2,0x04,0x01,0x05,0x02] +0x02,0x00,0x65,0xd2,0x04,0x01,0x05,0x02 + +# GFX950: v_ashr_pk_i8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x65,0xd2,0x04,0x07,0x09,0x00] +0x02,0x00,0x65,0xd2,0x04,0x07,0x09,0x00 + +# GFX950: v_ashr_pk_i8_i32 v2, v4, v7, 0.5 ; encoding: [0x02,0x00,0x65,0xd2,0x04,0x0f,0xc2,0x03] +0x02,0x00,0x65,0xd2,0x04,0x0f,0xc2,0x03 + +# GFX950: v_ashr_pk_i8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x65,0xd2,0x02,0x07,0x12,0x04] +0x01,0x40,0x65,0xd2,0x02,0x07,0x12,0x04 + +# GFX950: v_ashr_pk_u8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x66,0xd2,0x04,0x08,0x09,0x04] +0x02,0x00,0x66,0xd2,0x04,0x08,0x09,0x04 + +# GFX950: v_ashr_pk_u8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x66,0xd2,0x04,0x0e,0x22,0x04] +0x02,0x00,0x66,0xd2,0x04,0x0e,0x22,0x04 + +# GFX950: v_ashr_pk_u8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x66,0xd2,0x04,0x01,0x05,0x02] +0x02,0x00,0x66,0xd2,0x04,0x01,0x05,0x02 + +# GFX950: v_ashr_pk_u8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x66,0xd2,0x04,0x07,0x09,0x00] +0x02,0x00,0x66,0xd2,0x04,0x07,0x09,0x00 + +# GFX950: v_ashr_pk_u8_i32 v2, v4, v7, -2.0 ; encoding: [0x02,0x00,0x66,0xd2,0x04,0x0f,0xd6,0x03] +0x02,0x00,0x66,0xd2,0x04,0x0f,0xd6,0x03 + +# GFX950: v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x66,0xd2,0x02,0x07,0x12,0x04] +0x01,0x40,0x66,0xd2,0x02,0x07,0x12,0x04 + +# GFX950: v_dot2_f32_bf16 v5, v1, v2, v3 ; encoding: [0x05,0x40,0x9a,0xd3,0x01,0x05,0x0e,0x1c] +0x05,0x40,0x9a,0xd3,0x01,0x05,0x0e,0x1c + +# GFX950: v_dot2_f32_bf16 v5, v1, v2, s3 ; encoding: [0x05,0x40,0x9a,0xd3,0x01,0x05,0x0e,0x18] +0x05,0x40,0x9a,0xd3,0x01,0x05,0x0e,0x18 + +# GFX950: v_dot2_f32_bf16 v2, v1, 0, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0x01,0x01,0x09,0x1c] +0x02,0x40,0x9a,0xd3,0x01,0x01,0x09,0x1c + +# GFX950: v_dot2_f32_bf16 v2, v1, 0.5, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0x01,0xe1,0x09,0x1c] +0x02,0x40,0x9a,0xd3,0x01,0xe1,0x09,0x1c + +# GFX950: v_dot2_f32_bf16 v2, v1, -0.5, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0x01,0xe3,0x09,0x1c] +0x02,0x40,0x9a,0xd3,0x01,0xe3,0x09,0x1c + +# GFX950: v_dot2_f32_bf16 v2, v1, 1.0, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0x01,0xe5,0x09,0x1c] +0x02,0x40,0x9a,0xd3,0x01,0xe5,0x09,0x1c + +# GFX950: v_dot2_f32_bf16 v2, v1, -1.0, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0x01,0xe7,0x09,0x1c] +0x02,0x40,0x9a,0xd3,0x01,0xe7,0x09,0x1c + +# GFX950: v_dot2_f32_bf16 v2, v1, 2.0, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0x01,0xe9,0x09,0x1c] +0x02,0x40,0x9a,0xd3,0x01,0xe9,0x09,0x1c + +# GFX950: v_dot2_f32_bf16 v2, v1, -2.0, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0x01,0xeb,0x09,0x1c] +0x02,0x40,0x9a,0xd3,0x01,0xeb,0x09,0x1c + +# GFX950: v_dot2_f32_bf16 v2, v1, 4.0, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0x01,0xed,0x09,0x1c] +0x02,0x40,0x9a,0xd3,0x01,0xed,0x09,0x1c + +# GFX950: v_dot2_f32_bf16 v2, v1, -4.0, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0x01,0xef,0x09,0x1c] +0x02,0x40,0x9a,0xd3,0x01,0xef,0x09,0x1c + +# GFX950: v_dot2_f32_bf16 v2, v1, 0.15915494, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0x01,0xf1,0x09,0x1c] +0x02,0x40,0x9a,0xd3,0x01,0xf1,0x09,0x1c + +# GFX950: v_dot2_f32_bf16 v2, 0.5, v1, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0xf0,0x02,0x0a,0x1c] +0x02,0x40,0x9a,0xd3,0xf0,0x02,0x0a,0x1c + +# GFX950: v_dot2_f32_bf16 v2, -0.5, v1, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0xf1,0x02,0x0a,0x1c] +0x02,0x40,0x9a,0xd3,0xf1,0x02,0x0a,0x1c + +# GFX950: v_dot2_f32_bf16 v2, 1.0, v1, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0xf2,0x02,0x0a,0x1c] +0x02,0x40,0x9a,0xd3,0xf2,0x02,0x0a,0x1c + +# GFX950: v_dot2_f32_bf16 v2, -1.0, v1, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0xf3,0x02,0x0a,0x1c] +0x02,0x40,0x9a,0xd3,0xf3,0x02,0x0a,0x1c + +# GFX950: v_dot2_f32_bf16 v2, 2.0, v1, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0xf4,0x02,0x0a,0x1c] +0x02,0x40,0x9a,0xd3,0xf4,0x02,0x0a,0x1c + +# GFX950: v_dot2_f32_bf16 v2, -2.0, v1, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0xf5,0x02,0x0a,0x1c] +0x02,0x40,0x9a,0xd3,0xf5,0x02,0x0a,0x1c + +# GFX950: v_dot2_f32_bf16 v2, 4.0, v1, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0xf6,0x02,0x0a,0x1c] +0x02,0x40,0x9a,0xd3,0xf6,0x02,0x0a,0x1c + +# GFX950: v_dot2_f32_bf16 v2, -4.0, v1, v2 ; encoding: [0x02,0x40,0x9a,0xd3,0xf7,0x02,0x0a,0x1c] +0x02,0x40,0x9a,0xd3,0xf7,0x02,0x0a,0x1c + +# GFX950: v_cvt_scalef32_2xpk16_fp6_f32 v[20:25], v[10:25], v[10:25], v6 ; encoding: [0x14,0x00,0x52,0xd2,0x0a,0x15,0x1a,0x04] +0x14,0x00,0x52,0xd2,0x0a,0x15,0x1a,0x04 + +# GFX950: v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], v6 ; encoding: [0x14,0x00,0x53,0xd2,0x0a,0x15,0x1a,0x04] +0x14,0x00,0x53,0xd2,0x0a,0x15,0x1a,0x04 + +# GFX950: v_cvt_scalef32_2xpk16_fp6_f32 v[20:25], v[10:25], v[10:25], s6 ; encoding: [0x14,0x00,0x52,0xd2,0x0a,0x15,0x1a,0x00] +0x14,0x00,0x52,0xd2,0x0a,0x15,0x1a,0x00 + +# GFX950: v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], s6 ; encoding: [0x14,0x00,0x53,0xd2,0x0a,0x15,0x1a,0x00] +0x14,0x00,0x53,0xd2,0x0a,0x15,0x1a,0x00 + +# GFX950: v_cvt_scalef32_2xpk16_fp6_f32 v[20:25], v[10:25], v[10:25], 22 ; encoding: [0x14,0x00,0x52,0xd2,0x0a,0x15,0x5a,0x02] +0x14,0x00,0x52,0xd2,0x0a,0x15,0x5a,0x02 + +# GFX950: v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], 11 ; encoding: [0x14,0x00,0x53,0xd2,0x0a,0x15,0x2e,0x02] +0x14,0x00,0x53,0xd2,0x0a,0x15,0x2e,0x02 + +# GFX950: v_maximum3_f32 v1, -v2, -v3, -v4 ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0xe4] +0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0xe4 + +# GFX950: v_maximum3_f32 v1, -|v2|, -|v3|, -|v4| ; encoding: [0x01,0x07,0xa9,0xd2,0x02,0x07,0x12,0xe4] +0x01,0x07,0xa9,0xd2,0x02,0x07,0x12,0xe4 + +# GFX950: v_maximum3_f32 v1, 0, 1.0, v3 ; encoding: [0x01,0x00,0xa9,0xd2,0x80,0xe4,0x0d,0x04] +0x01,0x00,0xa9,0xd2,0x80,0xe4,0x0d,0x04 + +# GFX950: v_maximum3_f32 v1, s8, v3, 1.0 ; encoding: [0x01,0x00,0xa9,0xd2,0x08,0x06,0xca,0x03] +0x01,0x00,0xa9,0xd2,0x08,0x06,0xca,0x03 + +# GFX950: v_maximum3_f32 v1, v2, s8, v3 ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x11,0x0c,0x04] +0x01,0x00,0xa9,0xd2,0x02,0x11,0x0c,0x04 + +# GFX950: v_maximum3_f32 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0x04] +0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0x04 + +# GFX950: v_maximum3_f32 v2, 0, v3, 1.0 ; encoding: [0x02,0x00,0xa9,0xd2,0x80,0x06,0xca,0x03] +0x02,0x00,0xa9,0xd2,0x80,0x06,0xca,0x03 + +# GFX950: v_minimum3_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04] +0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04 + + +# GFX950: v_pk_maximum3_f16 v1, 2.0, v2, v3 ; encoding: [0x01,0x40,0x9c,0xd3,0xf4,0x04,0x0e,0x1c] +0x01,0x40,0x9c,0xd3,0xf4,0x04,0x0e,0x1c + +# GFX950: v_pk_maximum3_f16 v1, v2, 2.0, v3 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0xe9,0x0d,0x1c] +0x01,0x40,0x9c,0xd3,0x02,0xe9,0x0d,0x1c + +# GFX950: v_pk_maximum3_f16 v1, v2, v3, 2.0 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0x07,0xd2,0x1b] +0x01,0x40,0x9c,0xd3,0x02,0x07,0xd2,0x1b + +# GFX950: v_pk_maximum3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0x07,0x12,0x1c] +0x01,0x40,0x9c,0xd3,0x02,0x07,0x12,0x1c + +# GFX950: v_pk_maximum3_f16 v1, v2, v3, v4 clamp ; encoding: [0x01,0xc0,0x9c,0xd3,0x02,0x07,0x12,0x1c] +0x01,0xc0,0x9c,0xd3,0x02,0x07,0x12,0x1c + +# GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c] +0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c + +# GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x9c,0xd3,0x00,0x01,0x04,0x04] +0x08,0x60,0x9c,0xd3,0x00,0x01,0x04,0x04 + +# GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x9c,0xd3,0x00,0x01,0x04,0x04] +0x08,0x00,0x9c,0xd3,0x00,0x01,0x04,0x04 + +# GFX950: v_pk_maximum3_f16 v8, v0, s8, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x11,0x04,0x1c] +0x08,0x40,0x9c,0xd3,0x00,0x11,0x04,0x1c + +# GFX950: v_pk_maximum3_f16 v8, v0, v1, s8 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x03,0x22,0x18] +0x08,0x40,0x9c,0xd3,0x00,0x03,0x22,0x18 + +# GFX950: v_pk_minimum3_f16 v1, 2.0, v2, v3 ; encoding: [0x01,0x40,0x9b,0xd3,0xf4,0x04,0x0e,0x1c] +0x01,0x40,0x9b,0xd3,0xf4,0x04,0x0e,0x1c + +# GFX950: v_pk_minimum3_f16 v1, v2, 2.0, v3 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0xe9,0x0d,0x1c] +0x01,0x40,0x9b,0xd3,0x02,0xe9,0x0d,0x1c + +# GFX950: v_pk_minimum3_f16 v1, v2, v3, 2.0 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0x07,0xd2,0x1b] +0x01,0x40,0x9b,0xd3,0x02,0x07,0xd2,0x1b + +# GFX950: v_pk_minimum3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0x07,0x12,0x1c] +0x01,0x40,0x9b,0xd3,0x02,0x07,0x12,0x1c + +# GFX950: v_pk_minimum3_f16 v1, v2, v3, v4 clamp ; encoding: [0x01,0xc0,0x9b,0xd3,0x02,0x07,0x12,0x1c] +0x01,0xc0,0x9b,0xd3,0x02,0x07,0x12,0x1c + +# GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c] +0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c + +# GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x9b,0xd3,0x00,0x01,0x04,0x04] +0x08,0x60,0x9b,0xd3,0x00,0x01,0x04,0x04 + +# GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x9b,0xd3,0x00,0x01,0x04,0x04] +0x08,0x00,0x9b,0xd3,0x00,0x01,0x04,0x04 + +# GFX950: v_pk_minimum3_f16 v8, v0, s8, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x11,0x04,0x1c] +0x08,0x40,0x9b,0xd3,0x00,0x11,0x04,0x1c + +# GFX950: v_pk_minimum3_f16 v8, v0, v1, s8 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x03,0x22,0x18] +0x08,0x40,0x9b,0xd3,0x00,0x03,0x22,0x18 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_xdlops.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_xdlops.txt new file mode 100644 index 0000000000000..53b0bcb0aa1ae --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_xdlops.txt @@ -0,0 +1,133 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX950 %s + +# GFX950: v_dot2c_f32_bf16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x2c] +0x01,0x05,0x0a,0x2c + +# GFX950: v_dot2c_f32_bf16_e32 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x2d] +0x01,0x05,0xfe,0x2d + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0x00] +0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0x00 + +# GFX950: v_dot2c_f32_bf16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0xfe,0x2d,0x01,0xe4,0x00,0x00] +0xfa,0x04,0xfe,0x2d,0x01,0xe4,0x00,0x00 + +# GFX950: v_dot2c_f32_bf16_dpp v5, v255, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x2c,0xff,0xe4,0x00,0x00] +0xfa,0x04,0x0a,0x2c,0xff,0xe4,0x00,0x00 + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, v255 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xfe,0x0b,0x2c,0x01,0xe4,0x00,0x00] +0xfa,0xfe,0x0b,0x2c,0x01,0xe4,0x00,0x00 + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x2c,0x01,0x1b,0x00,0x00 + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, v2 row_mirror row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x40,0x01,0x00] +0xfa,0x04,0x0a,0x2c,0x01,0x40,0x01,0x00 + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, v2 row_half_mirror row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x41,0x01,0x00] +0xfa,0x04,0x0a,0x2c,0x01,0x41,0x01,0x00 + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, v2 row_bcast:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x42,0x01,0x00] +0xfa,0x04,0x0a,0x2c,0x01,0x42,0x01,0x00 + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, v2 row_bcast:31 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x43,0x01,0x00] +0xfa,0x04,0x0a,0x2c,0x01,0x43,0x01,0x00 + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, v2 wave_shl:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x30,0x01,0x00] +0xfa,0x04,0x0a,0x2c,0x01,0x30,0x01,0x00 + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, v2 wave_rol:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x34,0x01,0x00] +0xfa,0x04,0x0a,0x2c,0x01,0x34,0x01,0x00 + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, v2 wave_shr:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x38,0x01,0x00] +0xfa,0x04,0x0a,0x2c,0x01,0x38,0x01,0x00 + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, v2 wave_ror:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x3c,0x01,0x00] +0xfa,0x04,0x0a,0x2c,0x01,0x3c,0x01,0x00 + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, v2 row_shl:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x01,0x01,0x00] +0xfa,0x04,0x0a,0x2c,0x01,0x01,0x01,0x00 + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, v2 row_shl:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x0f,0x01,0x00] +0xfa,0x04,0x0a,0x2c,0x01,0x0f,0x01,0x00 + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, v2 row_shr:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x11,0x01,0x00] +0xfa,0x04,0x0a,0x2c,0x01,0x11,0x01,0x00 + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, v2 row_shr:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x1f,0x01,0x00] +0xfa,0x04,0x0a,0x2c,0x01,0x1f,0x01,0x00 + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, v2 row_ror:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x21,0x01,0x00] +0xfa,0x04,0x0a,0x2c,0x01,0x21,0x01,0x00 + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, v2 row_ror:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x2f,0x01,0x00] +0xfa,0x04,0x0a,0x2c,0x01,0x2f,0x01,0x00 + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x1 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0x10] +0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0x10 + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0x30] +0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0x30 + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0xf0] +0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0xf0 + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0xf0] +0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0xf0 + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0x01] +0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0x01 + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0x03] +0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0x03 + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0x0f] +0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0x0f + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0x0f] +0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0x0f + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x08,0x00] +0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x08,0x00 + +# GFX950: v_dot2c_f32_bf16_dpp v5, -v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x10,0x00] +0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x10,0x00 + +# GFX950: v_dot2c_f32_bf16_dpp v5, |v1|, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x20,0x00] +0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x20,0x00 + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, -v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x40,0x00] +0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x40,0x00 + +# GFX950: v_dot2c_f32_bf16_dpp v5, v1, |v2| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x80,0x00] +0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x80,0x00 + +# GFX950: v_dot2c_f32_bf16_e64 v5, v1, src_scc ; encoding: [0x05,0x00,0x16,0xd1,0x01,0xfb,0x01,0x00] +0x05,0x00,0x16,0xd1,0x01,0xfb,0x01,0x00 + +# GFX950: v_dot2c_f32_bf16_e64 v5, v255, src_execz ; encoding: [0x05,0x00,0x16,0xd1,0xff,0xf9,0x01,0x00] +0x05,0x00,0x16,0xd1,0xff,0xf9,0x01,0x00 + +# GFX950: v_dot2c_f32_bf16_e64 v5, s101, s101 ; encoding: [0x05,0x00,0x16,0xd1,0x65,0xca,0x00,0x00] +0x05,0x00,0x16,0xd1,0x65,0xca,0x00,0x00 + +# GFX950: v_dot2c_f32_bf16_e64 v5, -1, flat_scratch_lo ; encoding: [0x05,0x00,0x16,0xd1,0xc1,0xcc,0x00,0x00] +0x05,0x00,0x16,0xd1,0xc1,0xcc,0x00,0x00 + +# GFX950: v_dot2c_f32_bf16_e64 v5, 0.5, -|flat_scratch_hi| ; encoding: [0x05,0x02,0x16,0xd1,0xf0,0xce,0x00,0x40] +0x05,0x02,0x16,0xd1,0xf0,0xce,0x00,0x40 + +# GFX950: v_dot2c_f32_bf16_e64 v5, src_execz, 0.5 mul:4 ; encoding: [0x05,0x00,0x16,0xd1,0xfc,0xe0,0x01,0x10] +0x05,0x00,0x16,0xd1,0xfc,0xe0,0x01,0x10 + +# GFX950: v_dot2c_f32_bf16_e64 v255, -|src_scc|, -1 clamp div:2 ; encoding: [0xff,0x81,0x16,0xd1,0xfd,0x82,0x01,0x38] +0xff,0x81,0x16,0xd1,0xfd,0x82,0x01,0x38 + +# GFX950: v_dot2c_f32_bf16_e32 v5, 10, v2 ; encoding: [0x8a,0x04,0x0a,0x2c] +0x8a,0x04,0x0a,0x2c + +# GFX950: v_dot2c_f32_bf16_e32 v5, 0x64, v2 ; encoding: [0xff,0x04,0x0a,0x2c,0x64,0x00,0x00,0x00] +0xff,0x04,0x0a,0x2c,0x64,0x00,0x00,0x00 + +# GFX950: v_dot2c_f32_bf16_e32 v5, 0x4122, v2 ; encoding: [0xff,0x04,0x0a,0x2c,0x22,0x41,0x00,0x00] +0xff,0x04,0x0a,0x2c,0x22,0x41,0x00,0x00 diff --git a/llvm/test/MC/ELF/relocation.s b/llvm/test/MC/ELF/relocation.s index 88301f8447bc2..25497a003f853 100644 --- a/llvm/test/MC/ELF/relocation.s +++ b/llvm/test/MC/ELF/relocation.s @@ -21,6 +21,15 @@ bar: leaq foo@GOTTPOFF(%rip), %rax # R_X86_64_GOTTPOFF movq foo@GOTTPOFF(%rip), %r31 # R_X86_64_CODE_4_GOTTPOFF addq foo@GOTTPOFF(%rip), %r31 # R_X86_64_CODE_4_GOTTPOFF + # NDD + addq %r8, foo@GOTTPOFF(%rip), %r16 # R_X86_64_CODE_6_GOTTPOFF + addq foo@GOTTPOFF(%rip), %rax, %r12 # R_X86_64_CODE_6_GOTTPOFF + # NDD + NF + {nf} addq %r8, foo@GOTTPOFF(%rip), %r16 # R_X86_64_CODE_6_GOTTPOFF + {nf} addq foo@GOTTPOFF(%rip), %rax, %r12 # R_X86_64_CODE_6_GOTTPOFF + # NF + {nf} addq foo@GOTTPOFF(%rip), %r12 # R_X86_64_CODE_6_GOTTPOFF + leaq foo@TLSGD(%rip), %rax # R_X86_64_TLSGD leaq foo@TPOFF(%rax), %rax # R_X86_64_TPOFF32 leaq foo@TLSLD(%rip), %rdi # R_X86_64_TLSLD @@ -81,37 +90,42 @@ weak_sym: // CHECK-NEXT: 0x2D R_X86_64_GOTTPOFF foo 0xFFFFFFFFFFFFFFFC // CHECK-NEXT: 0x35 R_X86_64_CODE_4_GOTTPOFF foo 0xFFFFFFFFFFFFFFFC // CHECK-NEXT: 0x3D R_X86_64_CODE_4_GOTTPOFF foo 0xFFFFFFFFFFFFFFFC -// CHECK-NEXT: 0x44 R_X86_64_TLSGD foo 0xFFFFFFFFFFFFFFFC -// CHECK-NEXT: 0x4B R_X86_64_TPOFF32 foo 0x0 -// CHECK-NEXT: 0x52 R_X86_64_TLSLD foo 0xFFFFFFFFFFFFFFFC -// CHECK-NEXT: 0x59 R_X86_64_DTPOFF32 foo 0x0 -// CHECK-NEXT: 0x5F R_X86_64_GOT64 foo 0x0 -// CHECK-NEXT: 0x69 R_X86_64_GOTOFF64 foo 0x0 -// CHECK-NEXT: 0x72 R_X86_64_32S .text 0x0 -// CHECK-NEXT: 0x79 R_X86_64_PC32 foo 0xFFFFFFFFFFFFFFFC -// CHECK-NEXT: 0x80 R_X86_64_PC32 foo 0x80 -// CHECK-NEXT: 0x87 R_X86_64_32S .text 0x0 -// CHECK-NEXT: 0x8B R_X86_64_DTPOFF64 foo 0x0 -// CHECK-NEXT: 0x95 R_X86_64_TPOFF64 baz 0x0 -// CHECK-NEXT: 0x9D R_X86_64_PC16 foo 0x9D -// CHECK-NEXT: 0x9F R_X86_64_PC8 foo 0x9F -// CHECK-NEXT: 0xA1 R_X86_64_PLT32 foo 0xFFFFFFFFFFFFFFFC -// CHECK-NEXT: 0xA8 R_X86_64_PC32 foo 0xFFFFFFFFFFFFFFFB -// CHECK-NEXT: 0xAF R_X86_64_GOTPC32 _GLOBAL_OFFSET_TABLE_ 0x3 -// CHECK-NEXT: 0xB6 R_X86_64_GOTPC32 _GLOBAL_OFFSET_TABLE_ 0xFFFFFFFFFFFFFFFC -// CHECK-NEXT: 0xBB R_X86_64_GOTPC32 _GLOBAL_OFFSET_TABLE_ 0x1 -// CHECK-NEXT: 0xC1 R_X86_64_GOTPC64 _GLOBAL_OFFSET_TABLE_ 0x2 -// CHECK-NEXT: 0xC9 R_X86_64_SIZE64 blah 0x0 -// CHECK-NEXT: 0xD1 R_X86_64_SIZE64 blah 0x20 -// CHECK-NEXT: 0xD9 R_X86_64_SIZE64 blah 0xFFFFFFFFFFFFFFE0 -// CHECK-NEXT: 0xE4 R_X86_64_SIZE32 blah 0x0 -// CHECK-NEXT: 0xEB R_X86_64_SIZE32 blah 0x20 -// CHECK-NEXT: 0xF2 R_X86_64_SIZE32 blah 0xFFFFFFFFFFFFFFE0 -// CHECK-NEXT: 0xF6 R_X86_64_GOTPCREL foo 0x0 -// CHECK-NEXT: 0xFA R_X86_64_PLT32 foo 0x0 -// CHECK-NEXT: 0x10E R_X86_64_32 .text 0x10E -// CHECK-NEXT: 0x113 R_X86_64_PC16 pr23771 0xFFFFFFFFFFFFFFFE -// CHECK-NEXT: 0x115 R_X86_64_PC32 pr23272 0x0 +// CHECK-NEXT: 0x47 R_X86_64_CODE_6_GOTTPOFF foo 0xFFFFFFFFFFFFFFFA +// CHECK-NEXT: 0x51 R_X86_64_CODE_6_GOTTPOFF foo 0xFFFFFFFFFFFFFFFA +// CHECK-NEXT: 0x5B R_X86_64_CODE_6_GOTTPOFF foo 0xFFFFFFFFFFFFFFFA +// CHECK-NEXT: 0x65 R_X86_64_CODE_6_GOTTPOFF foo 0xFFFFFFFFFFFFFFFA +// CHECK-NEXT: 0x6F R_X86_64_CODE_6_GOTTPOFF foo 0xFFFFFFFFFFFFFFFA +// CHECK-NEXT: 0x76 R_X86_64_TLSGD foo 0xFFFFFFFFFFFFFFFC +// CHECK-NEXT: 0x7D R_X86_64_TPOFF32 foo 0x0 +// CHECK-NEXT: 0x84 R_X86_64_TLSLD foo 0xFFFFFFFFFFFFFFFC +// CHECK-NEXT: 0x8B R_X86_64_DTPOFF32 foo 0x0 +// CHECK-NEXT: 0x91 R_X86_64_GOT64 foo 0x0 +// CHECK-NEXT: 0x9B R_X86_64_GOTOFF64 foo 0x0 +// CHECK-NEXT: 0xA4 R_X86_64_32S .text 0x0 +// CHECK-NEXT: 0xAB R_X86_64_PC32 foo 0xFFFFFFFFFFFFFFFC +// CHECK-NEXT: 0xB2 R_X86_64_PC32 foo 0xB2 +// CHECK-NEXT: 0xB9 R_X86_64_32S .text 0x0 +// CHECK-NEXT: 0xBD R_X86_64_DTPOFF64 foo 0x0 +// CHECK-NEXT: 0xC7 R_X86_64_TPOFF64 baz 0x0 +// CHECK-NEXT: 0xCF R_X86_64_PC16 foo 0xCF +// CHECK-NEXT: 0xD1 R_X86_64_PC8 foo 0xD1 +// CHECK-NEXT: 0xD3 R_X86_64_PLT32 foo 0xFFFFFFFFFFFFFFFC +// CHECK-NEXT: 0xDA R_X86_64_PC32 foo 0xFFFFFFFFFFFFFFFB +// CHECK-NEXT: 0xE1 R_X86_64_GOTPC32 _GLOBAL_OFFSET_TABLE_ 0x3 +// CHECK-NEXT: 0xE8 R_X86_64_GOTPC32 _GLOBAL_OFFSET_TABLE_ 0xFFFFFFFFFFFFFFFC +// CHECK-NEXT: 0xED R_X86_64_GOTPC32 _GLOBAL_OFFSET_TABLE_ 0x1 +// CHECK-NEXT: 0xF3 R_X86_64_GOTPC64 _GLOBAL_OFFSET_TABLE_ 0x2 +// CHECK-NEXT: 0xFB R_X86_64_SIZE64 blah 0x0 +// CHECK-NEXT: 0x103 R_X86_64_SIZE64 blah 0x20 +// CHECK-NEXT: 0x10B R_X86_64_SIZE64 blah 0xFFFFFFFFFFFFFFE0 +// CHECK-NEXT: 0x116 R_X86_64_SIZE32 blah 0x0 +// CHECK-NEXT: 0x11D R_X86_64_SIZE32 blah 0x20 +// CHECK-NEXT: 0x124 R_X86_64_SIZE32 blah 0xFFFFFFFFFFFFFFE0 +// CHECK-NEXT: 0x128 R_X86_64_GOTPCREL foo 0x0 +// CHECK-NEXT: 0x12C R_X86_64_PLT32 foo 0x0 +// CHECK-NEXT: 0x140 R_X86_64_32 .text 0x140 +// CHECK-NEXT: 0x145 R_X86_64_PC16 pr23771 0xFFFFFFFFFFFFFFFE +// CHECK-NEXT: 0x147 R_X86_64_PC32 pr23272 0x0 // CHECK-NEXT: ] // CHECK-NEXT: } diff --git a/llvm/test/MachineVerifier/RISCV/subreg-liveness.mir b/llvm/test/MachineVerifier/RISCV/subreg-liveness.mir new file mode 100644 index 0000000000000..c69bc1b5eca64 --- /dev/null +++ b/llvm/test/MachineVerifier/RISCV/subreg-liveness.mir @@ -0,0 +1,27 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=none %s -o - | FileCheck %s +# REQUIRES: riscv64-registered-target + +# During the MachineVerifier, it assumes that used registers have been defined +# In this test case, while $v12_v13_v14_v15_v16 covers $v14_v15, +# $v14_v15 is not a sub-register of $v14m2 even though they share the same register. +# This corner case can be resolved by checking the register using RegUnit. + +... +--- +name: func +tracksRegLiveness: true +tracksDebugUserValues: true +body: | + bb.0: + liveins: $v0, $v8, $v9, $v10, $v11 + + ; CHECK-LABEL: name: func + ; CHECK: liveins: $v0, $v8, $v9, $v10, $v11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $v16m2 = PseudoVMV_V_I_M2 undef renamable $v16m2, 0, -1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v20m2 = VMV2R_V $v14m2, implicit $v12_v13_v14_v15_v16 + renamable $v16m2 = PseudoVMV_V_I_M2 undef renamable $v16m2, 0, -1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + $v20m2 = VMV2R_V $v14m2, implicit $v12_v13_v14_v15_v16 + +... diff --git a/llvm/test/Transforms/Inline/LoongArch/inline-target-features.ll b/llvm/test/Transforms/Inline/LoongArch/inline-target-features.ll new file mode 100644 index 0000000000000..f7a37015e07fc --- /dev/null +++ b/llvm/test/Transforms/Inline/LoongArch/inline-target-features.ll @@ -0,0 +1,34 @@ +; RUN: opt < %s -mtriple=loongarch64-unknown-linux-gnu -S -passes=inline | FileCheck %s +; RUN: opt < %s -mtriple=loongarch64-unknown-linux-gnu -S -passes='cgscc(inline)' | FileCheck %s +; Check that we only inline when we have compatible target attributes. + +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" +target triple = "loongarch64-unknown-linux-gnu" + +define i32 @foo() #0 { +entry: + %call = call i32 (...) @baz() + ret i32 %call +; CHECK-LABEL: foo +; CHECK: call i32 (...) @baz() +} +declare i32 @baz(...) #0 + +define i32 @bar() #1 { +entry: + %call = call i32 @foo() + ret i32 %call +; CHECK-LABEL: bar +; CHECK: call i32 (...) @baz() +} + +define i32 @qux() #0 { +entry: + %call = call i32 @bar() + ret i32 %call +; CHECK-LABEL: qux +; CHECK: call i32 @bar() +} + +attributes #0 = { "target-cpu"="generic-la64" "target-features"="+f,+d" } +attributes #1 = { "target-cpu"="generic-la64" "target-features"="+f,+d,+lsx,+lasx" } diff --git a/llvm/test/Transforms/Inline/LoongArch/lit.local.cfg b/llvm/test/Transforms/Inline/LoongArch/lit.local.cfg new file mode 100644 index 0000000000000..cc24278acbb41 --- /dev/null +++ b/llvm/test/Transforms/Inline/LoongArch/lit.local.cfg @@ -0,0 +1,2 @@ +if not "LoongArch" in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wavefrontsize.ll b/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wavefrontsize.ll new file mode 100644 index 0000000000000..d9c105f753e26 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wavefrontsize.ll @@ -0,0 +1,114 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -mtriple=amdgcn-- -passes=instcombine -S < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -mtriple=amdgcn-- -mattr=+wavefrontsize32 -passes=instcombine -S < %s | FileCheck -check-prefix=OPT-W32 %s +; RUN: opt -mtriple=amdgcn-- -mattr=+wavefrontsize64 -passes=instcombine -S < %s | FileCheck -check-prefix=OPT-W64 %s +; RUN: opt -mtriple=amdgcn-- -mcpu=tonga -passes=instcombine -S < %s | FileCheck -check-prefix=OPT-W64 %s +; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -mattr=+wavefrontsize32 -passes=instcombine -S < %s | FileCheck -check-prefix=OPT-W32 %s +; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -mattr=+wavefrontsize64 -passes=instcombine -S < %s | FileCheck -check-prefix=OPT-W64 %s +; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=+wavefrontsize32 -passes=instcombine -S < %s | FileCheck -check-prefix=OPT-W32 %s +; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=+wavefrontsize64 -passes=instcombine -S < %s | FileCheck -check-prefix=OPT-W64 %s + +define amdgpu_kernel void @fold_wavefrontsize(ptr addrspace(1) nocapture %arg) { +; OPT-LABEL: define amdgpu_kernel void @fold_wavefrontsize( +; OPT-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) { +; OPT-NEXT: [[BB:.*:]] +; OPT-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.wavefrontsize() #[[ATTR1:[0-9]+]] +; OPT-NEXT: store i32 [[TMP]], ptr addrspace(1) [[ARG]], align 4 +; OPT-NEXT: ret void +; +; OPT-W32-LABEL: define amdgpu_kernel void @fold_wavefrontsize( +; OPT-W32-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; OPT-W32-NEXT: [[BB:.*:]] +; OPT-W32-NEXT: store i32 32, ptr addrspace(1) [[ARG]], align 4 +; OPT-W32-NEXT: ret void +; +; OPT-W64-LABEL: define amdgpu_kernel void @fold_wavefrontsize( +; OPT-W64-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; OPT-W64-NEXT: [[BB:.*:]] +; OPT-W64-NEXT: store i32 64, ptr addrspace(1) [[ARG]], align 4 +; OPT-W64-NEXT: ret void +; +bb: + %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0 + store i32 %tmp, ptr addrspace(1) %arg, align 4 + ret void +} + +define amdgpu_kernel void @fold_and_optimize_wavefrontsize(ptr addrspace(1) nocapture %arg) { +; OPT-LABEL: define amdgpu_kernel void @fold_and_optimize_wavefrontsize( +; OPT-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) { +; OPT-NEXT: [[BB:.*:]] +; OPT-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.wavefrontsize() #[[ATTR1]] +; OPT-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP]], 32 +; OPT-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 1 +; OPT-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[ARG]], align 4 +; OPT-NEXT: ret void +; +; OPT-W32-LABEL: define amdgpu_kernel void @fold_and_optimize_wavefrontsize( +; OPT-W32-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) #[[ATTR0]] { +; OPT-W32-NEXT: [[BB:.*:]] +; OPT-W32-NEXT: store i32 1, ptr addrspace(1) [[ARG]], align 4 +; OPT-W32-NEXT: ret void +; +; OPT-W64-LABEL: define amdgpu_kernel void @fold_and_optimize_wavefrontsize( +; OPT-W64-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) #[[ATTR0]] { +; OPT-W64-NEXT: [[BB:.*:]] +; OPT-W64-NEXT: store i32 2, ptr addrspace(1) [[ARG]], align 4 +; OPT-W64-NEXT: ret void +; +bb: + %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0 + %tmp1 = icmp ugt i32 %tmp, 32 + %tmp2 = select i1 %tmp1, i32 2, i32 1 + store i32 %tmp2, ptr addrspace(1) %arg + ret void +} + +define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize(ptr addrspace(1) nocapture %arg) { +; OPT-LABEL: define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize( +; OPT-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) { +; OPT-NEXT: [[BB:.*:]] +; OPT-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.wavefrontsize() #[[ATTR1]] +; OPT-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP]], 32 +; OPT-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB3:.*]] +; OPT: [[BB2]]: +; OPT-NEXT: store i32 1, ptr addrspace(1) [[ARG]], align 4 +; OPT-NEXT: br label %[[BB3]] +; OPT: [[BB3]]: +; OPT-NEXT: ret void +; +; OPT-W32-LABEL: define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize( +; OPT-W32-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) #[[ATTR0]] { +; OPT-W32-NEXT: [[BB:.*:]] +; OPT-W32-NEXT: br i1 false, label %[[BB2:.*]], label %[[BB3:.*]] +; OPT-W32: [[BB2]]: +; OPT-W32-NEXT: br label %[[BB3]] +; OPT-W32: [[BB3]]: +; OPT-W32-NEXT: ret void +; +; OPT-W64-LABEL: define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize( +; OPT-W64-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) #[[ATTR0]] { +; OPT-W64-NEXT: [[BB:.*:]] +; OPT-W64-NEXT: br i1 true, label %[[BB2:.*]], label %[[BB3:.*]] +; OPT-W64: [[BB2]]: +; OPT-W64-NEXT: store i32 1, ptr addrspace(1) [[ARG]], align 4 +; OPT-W64-NEXT: br label %[[BB3]] +; OPT-W64: [[BB3]]: +; OPT-W64-NEXT: ret void +; +bb: + %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0 + %tmp1 = icmp ugt i32 %tmp, 32 + br i1 %tmp1, label %bb2, label %bb3 + +bb2: ; preds = %bb + store i32 1, ptr addrspace(1) %arg, align 4 + br label %bb3 + +bb3: ; preds = %bb2, %bb + ret void +} + +declare i32 @llvm.amdgcn.wavefrontsize() #0 + +attributes #0 = { nounwind readnone speculatable } diff --git a/llvm/test/Transforms/InstCombine/and-fcmp.ll b/llvm/test/Transforms/InstCombine/and-fcmp.ll index 30b9fca6e97ad..c7bbc8ab56f9a 100644 --- a/llvm/test/Transforms/InstCombine/and-fcmp.ll +++ b/llvm/test/Transforms/InstCombine/and-fcmp.ll @@ -5044,11 +5044,9 @@ define i1 @isnormal_logical_select_0_fmf1(half %x) { define i1 @and_fcmp_reassoc1(i1 %x, double %a, double %b) { ; CHECK-LABEL: @and_fcmp_reassoc1( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[CMP1:%.*]] = fcmp ugt double [[A]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[RETVAL:%.*]] = and i1 [[TMP1]], [[X:%.*]] -; CHECK-NEXT: [[RETVAL1:%.*]] = and i1 [[RETVAL]], [[CMP1]] -; CHECK-NEXT: ret i1 [[RETVAL1]] +; CHECK-NEXT: ret i1 [[RETVAL]] ; %cmp = fcmp ult double %a, %b %cmp1 = fcmp ugt double %a, %b @@ -5059,11 +5057,9 @@ define i1 @and_fcmp_reassoc1(i1 %x, double %a, double %b) { define i1 @and_fcmp_reassoc2(i1 %x, double %a, double %b) { ; CHECK-LABEL: @and_fcmp_reassoc2( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[CMP1:%.*]] = fcmp ugt double [[A]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[RETVAL:%.*]] = and i1 [[X:%.*]], [[TMP1]] -; CHECK-NEXT: [[RETVAL1:%.*]] = and i1 [[RETVAL]], [[CMP1]] -; CHECK-NEXT: ret i1 [[RETVAL1]] +; CHECK-NEXT: ret i1 [[RETVAL]] ; %cmp = fcmp ult double %a, %b %cmp1 = fcmp ugt double %a, %b @@ -5074,11 +5070,9 @@ define i1 @and_fcmp_reassoc2(i1 %x, double %a, double %b) { define i1 @and_fcmp_reassoc3(i1 %x, double %a, double %b) { ; CHECK-LABEL: @and_fcmp_reassoc3( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[CMP1:%.*]] = fcmp ugt double [[A]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[RETVAL:%.*]] = and i1 [[TMP1]], [[X:%.*]] -; CHECK-NEXT: [[RETVAL1:%.*]] = and i1 [[CMP1]], [[RETVAL]] -; CHECK-NEXT: ret i1 [[RETVAL1]] +; CHECK-NEXT: ret i1 [[RETVAL]] ; %cmp = fcmp ult double %a, %b %cmp1 = fcmp ugt double %a, %b @@ -5089,11 +5083,9 @@ define i1 @and_fcmp_reassoc3(i1 %x, double %a, double %b) { define i1 @and_fcmp_reassoc4(i1 %x, double %a, double %b) { ; CHECK-LABEL: @and_fcmp_reassoc4( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[CMP1:%.*]] = fcmp ugt double [[A]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[RETVAL:%.*]] = and i1 [[X:%.*]], [[TMP1]] -; CHECK-NEXT: [[RETVAL1:%.*]] = and i1 [[CMP1]], [[RETVAL]] -; CHECK-NEXT: ret i1 [[RETVAL1]] +; CHECK-NEXT: ret i1 [[RETVAL]] ; %cmp = fcmp ult double %a, %b %cmp1 = fcmp ugt double %a, %b diff --git a/llvm/test/Transforms/InstCombine/eq-of-parts.ll b/llvm/test/Transforms/InstCombine/eq-of-parts.ll index 00ee7bf643286..d07c2e6a5be52 100644 --- a/llvm/test/Transforms/InstCombine/eq-of-parts.ll +++ b/llvm/test/Transforms/InstCombine/eq-of-parts.ll @@ -1441,11 +1441,7 @@ define i1 @ne_optimized_highbits_cmp_todo_overlapping(i32 %x, i32 %y) { define i1 @and_trunc_i1(i8 %a1, i8 %a2) { ; CHECK-LABEL: @and_trunc_i1( -; CHECK-NEXT: [[XOR:%.*]] = xor i8 [[A1:%.*]], [[A2:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[XOR]], 2 -; CHECK-NEXT: [[LOBIT:%.*]] = trunc i8 [[XOR]] to i1 -; CHECK-NEXT: [[LOBIT_INV:%.*]] = xor i1 [[LOBIT]], true -; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP]], [[LOBIT_INV]] +; CHECK-NEXT: [[AND:%.*]] = icmp eq i8 [[A1:%.*]], [[A2:%.*]] ; CHECK-NEXT: ret i1 [[AND]] ; %xor = xor i8 %a1, %a2 @@ -1494,10 +1490,7 @@ define i1 @and_trunc_i1_wrong_operands(i8 %a1, i8 %a2, i8 %a3) { define i1 @or_trunc_i1(i64 %a1, i64 %a2) { ; CHECK-LABEL: @or_trunc_i1( -; CHECK-NEXT: [[XOR:%.*]] = xor i64 [[A2:%.*]], [[A1:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[XOR]], 1 -; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[XOR]] to i1 -; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP]], [[TRUNC]] +; CHECK-NEXT: [[OR:%.*]] = icmp ne i64 [[A2:%.*]], [[A1:%.*]] ; CHECK-NEXT: ret i1 [[OR]] ; %xor = xor i64 %a2, %a1 @@ -1538,3 +1531,28 @@ define i1 @or_trunc_i1_wrong_operands(i64 %a1, i64 %a2, i64 %a3) { %or = or i1 %cmp, %trunc ret i1 %or } + +define i1 @jv_identical(i64 %arg1, i64 %arg2) { +; CHECK-LABEL: @jv_identical( +; CHECK-NEXT: [[ARG1_TRUNC:%.*]] = trunc i64 [[ARG1:%.*]] to i8 +; CHECK-NEXT: [[ARG2_TRUNC:%.*]] = trunc i64 [[ARG2:%.*]] to i8 +; CHECK-NEXT: [[EQ1:%.*]] = icmp eq i8 [[ARG1_TRUNC]], [[ARG2_TRUNC]] +; CHECK-NEXT: [[DOTUNSHIFTED:%.*]] = xor i64 [[ARG2]], [[ARG1]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[DOTUNSHIFTED]], 65536 +; CHECK-NEXT: [[AND2:%.*]] = and i1 [[EQ1]], [[TMP1]] +; CHECK-NEXT: ret i1 [[AND2]] +; + %arg1.trunc = trunc i64 %arg1 to i8 + %arg1.shift = lshr i64 %arg1, 16 + %arg1.shift.trunc = trunc i64 %arg1.shift to i16 + %arg2.trunc = trunc i64 %arg2 to i8 + %arg2.shift = lshr i64 %arg2, 16 + %arg2.shift.trunc = trunc i64 %arg2.shift to i16 + %eq1 = icmp eq i8 %arg1.trunc, %arg2.trunc + %eq2 = icmp eq i16 %arg1.shift.trunc, %arg2.shift.trunc + %and1 = and i1 %eq1, %eq2 + %xor = xor i64 %arg2, %arg1 + %cmp = icmp ult i64 %xor, 4294967296 + %and2 = and i1 %cmp, %and1 + ret i1 %and2 +} diff --git a/llvm/test/Transforms/InstCombine/fptrunc.ll b/llvm/test/Transforms/InstCombine/fptrunc.ll index f46940ff060d4..a4296a326c4bc 100644 --- a/llvm/test/Transforms/InstCombine/fptrunc.ll +++ b/llvm/test/Transforms/InstCombine/fptrunc.ll @@ -90,6 +90,19 @@ define half @fptrunc_select_true_val_extra_use(half %x, float %y, i1 %cond) { ret half %r } +define half @fptrunc_max(half %arg) { +; CHECK-LABEL: @fptrunc_max( +; CHECK-NEXT: [[CMP:%.*]] = fcmp olt half [[ARG:%.*]], 0xH0000 +; CHECK-NEXT: [[NARROW_SEL:%.*]] = select i1 [[CMP]], half 0xH0000, half [[ARG]] +; CHECK-NEXT: ret half [[NARROW_SEL]] +; + %ext = fpext half %arg to double + %cmp = fcmp olt double %ext, 0.000000e+00 + %max = select i1 %cmp, double 0.000000e+00, double %ext + %trunc = fptrunc double %max to half + ret half %trunc +} + ; Negative test - this would require an extra instruction. define half @fptrunc_select_true_val_extra_use_2(half %x, float %y, i1 %cond) { diff --git a/llvm/test/Transforms/InstCombine/or-fcmp.ll b/llvm/test/Transforms/InstCombine/or-fcmp.ll index a2842f7a45f59..193fe4b5cc722 100644 --- a/llvm/test/Transforms/InstCombine/or-fcmp.ll +++ b/llvm/test/Transforms/InstCombine/or-fcmp.ll @@ -54,7 +54,7 @@ define i1 @PR41069(double %a, double %b, double %c, double %d) { ; CHECK-LABEL: @PR41069( ; CHECK-NEXT: [[UNO1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[D:%.*]], [[C:%.*]] -; CHECK-NEXT: [[R:%.*]] = or i1 [[TMP1]], [[UNO1]] +; CHECK-NEXT: [[R:%.*]] = or i1 [[UNO1]], [[TMP1]] ; CHECK-NEXT: ret i1 [[R]] ; %uno1 = fcmp uno double %a, %b @@ -87,7 +87,7 @@ define i1 @PR41069_commute(double %a, double %b, double %c, double %d) { ; CHECK-LABEL: @PR41069_commute( ; CHECK-NEXT: [[UNO1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[D:%.*]], [[C:%.*]] -; CHECK-NEXT: [[R:%.*]] = or i1 [[TMP1]], [[UNO1]] +; CHECK-NEXT: [[R:%.*]] = or i1 [[UNO1]], [[TMP1]] ; CHECK-NEXT: ret i1 [[R]] ; %uno1 = fcmp uno double %a, %b @@ -4608,11 +4608,9 @@ define i1 @intersect_fmf_4(double %a, double %b) { define i1 @or_fcmp_reassoc1(i1 %x, double %a, double %b) { ; CHECK-LABEL: @or_fcmp_reassoc1( -; CHECK-NEXT: [[OR:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt double [[A]], [[B]] +; CHECK-NEXT: [[OR:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[RETVAL:%.*]] = or i1 [[OR]], [[CMP1:%.*]] -; CHECK-NEXT: [[RETVAL1:%.*]] = or i1 [[RETVAL]], [[CMP2]] -; CHECK-NEXT: ret i1 [[RETVAL1]] +; CHECK-NEXT: ret i1 [[RETVAL]] ; %cmp = fcmp olt double %a, %b %cmp1 = fcmp ogt double %a, %b @@ -4623,11 +4621,9 @@ define i1 @or_fcmp_reassoc1(i1 %x, double %a, double %b) { define i1 @or_fcmp_reassoc2(i1 %x, double %a, double %b) { ; CHECK-LABEL: @or_fcmp_reassoc2( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt double [[A]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[RETVAL:%.*]] = or i1 [[X:%.*]], [[TMP1]] -; CHECK-NEXT: [[RETVAL1:%.*]] = or i1 [[RETVAL]], [[CMP1]] -; CHECK-NEXT: ret i1 [[RETVAL1]] +; CHECK-NEXT: ret i1 [[RETVAL]] ; %cmp = fcmp olt double %a, %b %cmp1 = fcmp ogt double %a, %b @@ -4638,11 +4634,9 @@ define i1 @or_fcmp_reassoc2(i1 %x, double %a, double %b) { define i1 @or_fcmp_reassoc3(i1 %x, double %a, double %b) { ; CHECK-LABEL: @or_fcmp_reassoc3( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt double [[A]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[RETVAL:%.*]] = or i1 [[TMP1]], [[X:%.*]] -; CHECK-NEXT: [[RETVAL1:%.*]] = or i1 [[CMP1]], [[RETVAL]] -; CHECK-NEXT: ret i1 [[RETVAL1]] +; CHECK-NEXT: ret i1 [[RETVAL]] ; %cmp = fcmp olt double %a, %b %cmp1 = fcmp ogt double %a, %b @@ -4653,11 +4647,9 @@ define i1 @or_fcmp_reassoc3(i1 %x, double %a, double %b) { define i1 @or_fcmp_reassoc4(i1 %x, double %a, double %b) { ; CHECK-LABEL: @or_fcmp_reassoc4( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt double [[A]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[RETVAL:%.*]] = or i1 [[X:%.*]], [[TMP1]] -; CHECK-NEXT: [[RETVAL1:%.*]] = or i1 [[CMP1]], [[RETVAL]] -; CHECK-NEXT: ret i1 [[RETVAL1]] +; CHECK-NEXT: ret i1 [[RETVAL]] ; %cmp = fcmp olt double %a, %b %cmp1 = fcmp ogt double %a, %b diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll index 414f39d557044..ab541f6fa94e6 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll @@ -60,7 +60,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: Successor(s): ir-bb, scalar.ph ; IF-EVL-OUTLOOP-EMPTY: ; IF-EVL-OUTLOOP-NEXT: ir-bb: -; IF-EVL-OUTLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]>) +; IF-EVL-OUTLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]> from middle.block) ; IF-EVL-OUTLOOP-NEXT: No successors ; IF-EVL-OUTLOOP-EMPTY: ; IF-EVL-OUTLOOP-NEXT: scalar.ph: @@ -110,7 +110,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: Successor(s): ir-bb, scalar.ph ; IF-EVL-INLOOP-EMPTY: ; IF-EVL-INLOOP-NEXT: ir-bb: -; IF-EVL-INLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]>) +; IF-EVL-INLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]> from middle.block) ; IF-EVL-INLOOP-NEXT: No successors ; IF-EVL-INLOOP-EMPTY: ; IF-EVL-INLOOP-NEXT: scalar.ph: @@ -156,7 +156,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: Successor(s): ir-bb, scalar.ph ; NO-VP-OUTLOOP-EMPTY: ; NO-VP-OUTLOOP-NEXT: ir-bb: -; NO-VP-OUTLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]>) +; NO-VP-OUTLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]> from middle.block) ; NO-VP-OUTLOOP-NEXT: No successors ; NO-VP-OUTLOOP-EMPTY: ; NO-VP-OUTLOOP-NEXT: scalar.ph: @@ -202,7 +202,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: Successor(s): ir-bb, scalar.ph ; NO-VP-INLOOP-EMPTY: ; NO-VP-INLOOP-NEXT: ir-bb: -; NO-VP-INLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]>) +; NO-VP-INLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]> from middle.block) ; NO-VP-INLOOP-NEXT: No successors ; NO-VP-INLOOP-EMPTY: ; NO-VP-INLOOP-NEXT: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/branch-weights.ll b/llvm/test/Transforms/LoopVectorize/branch-weights.ll index db2c8188a7cb3..d5f1b46bd5421 100644 --- a/llvm/test/Transforms/LoopVectorize/branch-weights.ll +++ b/llvm/test/Transforms/LoopVectorize/branch-weights.ll @@ -18,7 +18,7 @@ ; CHECK: br label %vector.body ; ; CHECK: vector.body: -; CHECK: br i1 %8, label %middle.block, label %vector.body, !prof [[PROF_F0_VECTOR_BODY:![0-9]+]] +; CHECK: br i1 {{.+}}, label %middle.block, label %vector.body, !prof [[PROF_F0_VECTOR_BODY:![0-9]+]] ; ; CHECK: middle.block: ; CHECK: br i1 %cmp.n, label %exit.loopexit, label %vec.epilog.iter.check, !prof [[PROF_F0_MIDDLE_BLOCKS:![0-9]+]] @@ -30,10 +30,10 @@ ; CHECK: br label %vec.epilog.vector.body ; ; CHECK: vec.epilog.vector.body: -; CHECK: br i1 %12, label %vec.epilog.middle.block, label %vec.epilog.vector.body, !prof [[PROF_F0_VEC_EPILOG_VECTOR_BODY:![0-9]+]] +; CHECK: br i1 {{.+}}, label %vec.epilog.middle.block, label %vec.epilog.vector.body, !prof [[PROF_F0_VEC_EPILOG_VECTOR_BODY:![0-9]+]] ; ; CHECK: vec.epilog.middle.block: -; CHECK: br i1 %cmp.n12, label %exit.loopexit, label %vec.epilog.scalar.ph, !prof [[PROF_F0_MIDDLE_BLOCKS:![0-9]+]] +; CHECK: br i1 %cmp.n11, label %exit.loopexit, label %vec.epilog.scalar.ph, !prof [[PROF_F0_MIDDLE_BLOCKS:![0-9]+]] ; ; CHECK: vec.epilog.scalar.ph: ; CHECK: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll index bcacfb358ec05..517de8be5c998 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll @@ -48,8 +48,8 @@ define void @test_chained_first_order_recurrences_1(ptr %ptr) { ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>) -; CHECK-NEXT: IR %for.2 = phi i16 [ 33, %entry ], [ %for.1, %loop ] (extra operand: vp<[[RESUME_2_P]]>.1) +; CHECK-NEXT: IR %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) +; CHECK-NEXT: IR %for.2 = phi i16 [ 33, %entry ], [ %for.1, %loop ] (extra operand: vp<[[RESUME_2_P]]>.1 from scalar.ph) ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK: IR %exitcond.not = icmp eq i64 %iv.next, 1000 ; CHECK-NEXT: No successors @@ -125,9 +125,9 @@ define void @test_chained_first_order_recurrences_3(ptr %ptr) { ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>) -; CHECK-NEXT: IR %for.2 = phi i16 [ 33, %entry ], [ %for.1, %loop ] (extra operand: vp<[[RESUME_2_P]]>.1) -; CHECK-NEXT: IR %for.3 = phi i16 [ 33, %entry ], [ %for.2, %loop ] (extra operand: vp<[[RESUME_3_P]]>.2) +; CHECK-NEXT: IR %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) +; CHECK-NEXT: IR %for.2 = phi i16 [ 33, %entry ], [ %for.1, %loop ] (extra operand: vp<[[RESUME_2_P]]>.1 from scalar.ph) +; CHECK-NEXT: IR %for.3 = phi i16 [ 33, %entry ], [ %for.2, %loop ] (extra operand: vp<[[RESUME_3_P]]>.2 from scalar.ph) ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK: IR %exitcond.not = icmp eq i64 %iv.next, 1000 ; CHECK-NEXT: No successors @@ -205,8 +205,8 @@ define i32 @test_chained_first_order_recurrences_4(ptr %base, i64 %x) { ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] -; CHECK-NEXT: IR %for.x = phi i64 [ %for.x.next, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_X]]>) -; CHECK-NEXT: IR %for.y = phi i32 [ %for.x.prev, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_Y]]>.1) +; CHECK-NEXT: IR %for.x = phi i64 [ %for.x.next, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_X]]> from scalar.ph) +; CHECK-NEXT: IR %for.y = phi i32 [ %for.x.prev, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_Y]]>.1 from scalar.ph) ; CHECK: No successors ; CHECK-NEXT: } ; @@ -279,8 +279,8 @@ define i32 @test_chained_first_order_recurrences_5_hoist_to_load(ptr %base) { ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] -; CHECK-NEXT: IR %for.x = phi i64 [ %for.x.next, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_X]]>) -; CHECK-NEXT: IR %for.y = phi i32 [ %for.x.prev, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_Y]]>.1) +; CHECK-NEXT: IR %for.x = phi i64 [ %for.x.next, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_X]]> from scalar.ph) +; CHECK-NEXT: IR %for.y = phi i32 [ %for.x.prev, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_Y]]>.1 from scalar.ph) ; CHECK: No successors ; CHECK-NEXT: } ; diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 8ae538cf63986..d0c811763a522 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -85,7 +85,7 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %0 = phi i32 [ 0, %entry ], [ %conv, %loop ] (extra operand: vp<[[RESUME_1_P]]>) +; CHECK-NEXT: IR %0 = phi i32 [ 0, %entry ], [ %conv, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) ; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK: IR %ec = icmp eq i32 %iv.next, 20001 ; CHECK-NEXT: No successors @@ -172,7 +172,7 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>) +; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) ; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK: IR %ec = icmp eq i32 %iv.next, 20001 ; CHECK-NEXT: No successors @@ -235,7 +235,7 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb -; CHECK-NEXT: IR %res = phi i32 [ %and.red.next, %loop ] (extra operand: vp<[[RED_EX]]>) +; CHECK-NEXT: IR %res = phi i32 [ %and.red.next, %loop ] (extra operand: vp<[[RED_EX]]> from middle.block) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph @@ -244,7 +244,7 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>) +; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) ; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK-NEXT: IR %and.red = phi i32 [ 1234, %entry ], [ %and.red.next, %loop ] ; CHECK: IR %ec = icmp eq i32 %iv.next, 20001 @@ -355,7 +355,7 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %0 = phi i32 [ 0, %entry ], [ %conv, %loop ] (extra operand: vp<[[RESUME_1_P]]>) +; CHECK-NEXT: IR %0 = phi i32 [ 0, %entry ], [ %conv, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) ; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK: IR %ec = icmp eq i32 %iv.next, 20001 ; CHECK-NEXT: No successors @@ -452,7 +452,7 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>) +; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) ; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK: IR %C = icmp sgt i32 %iv.next, %recur.next ; CHECK-NEXT: No successors @@ -539,7 +539,7 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %iv = phi i64 [ 2, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: IR %.pn = phi i32 [ 0, %entry ], [ %l, %loop ] (extra operand: vp<[[RESUME_1_P]]>) +; CHECK-NEXT: IR %.pn = phi i32 [ 0, %entry ], [ %l, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) ; CHECK: IR %ec = icmp ugt i64 %iv, 3 ; CHECK-NEXT: No successors ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index a71666d8c3167..dd58dc81ccedd 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -227,7 +227,7 @@ exit: ; DBG-EMPTY: ; DBG-NEXT: ir-bb: ; DBG-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] -; DBG-NEXT: IR %for = phi i32 [ 0, %entry ], [ %iv.trunc, %loop ] (extra operand: vp<[[RESUME_P]]>) +; DBG-NEXT: IR %for = phi i32 [ 0, %entry ], [ %iv.trunc, %loop ] (extra operand: vp<[[RESUME_P]]> from scalar.ph) ; DBG: IR %ec = icmp slt i32 %iv.next.trunc, %n ; DBG-NEXT: No successors ; DBG-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll index 20710db19ba90..d3582ae16d1c1 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll @@ -1,8 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK define i64 @select_icmp_nuw_nsw(ptr %a, ptr %b, i64 %ii, i64 %n) { -; CHECK-LABEL: define i64 @select_icmp_nuw_nsw -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i64 @select_icmp_nuw_nsw( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[II:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[II]], %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8 +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ] +; CHECK-NEXT: ret i64 [[COND_LCSSA]] ; entry: br label %for.body @@ -25,8 +43,25 @@ exit: ; preds = %for.body } define i64 @select_icmp_nsw(ptr %a, ptr %b, i64 %ii, i64 %n) { -; CHECK-LABEL: define i64 @select_icmp_nsw -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i64 @select_icmp_nsw( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[II:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[II]], %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8 +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]] +; CHECK-NEXT: [[INC]] = add nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ] +; CHECK-NEXT: ret i64 [[COND_LCSSA]] ; entry: br label %for.body @@ -49,8 +84,25 @@ exit: ; preds = %for.body } define i64 @select_icmp_nuw(ptr %a, ptr %b, i64 %ii, i64 %n) { -; CHECK-LABEL: define i64 @select_icmp_nuw -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i64 @select_icmp_nuw( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[II:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[II]], %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8 +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]] +; CHECK-NEXT: [[INC]] = add nuw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ] +; CHECK-NEXT: ret i64 [[COND_LCSSA]] ; entry: br label %for.body @@ -73,8 +125,25 @@ exit: ; preds = %for.body } define i64 @select_icmp_noflag(ptr %a, ptr %b, i64 %ii, i64 %n) { -; CHECK-LABEL: define i64 @select_icmp_noflag -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i64 @select_icmp_noflag( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[II:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[II]], %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8 +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]] +; CHECK-NEXT: [[INC]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ] +; CHECK-NEXT: ret i64 [[COND_LCSSA]] ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll index b94e9f99868ef..2eb63db2b0247 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK ; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK ; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK @@ -11,8 +12,31 @@ ; %cmp.sgt = icmp sgt i32 %n, 0 ; and successfully vectorize the case without a runtime-check. define i32 @select_icmp_const_truncated_iv_widened_exit(ptr %a, i32 %n) { -; CHECK-LABEL: define i32 @select_icmp_const_truncated_iv_widened_exit -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i32 @select_icmp_const_truncated_iv_widened_exit( +; CHECK-SAME: ptr [[A:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP_SGT:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[FOR_BODY_PREHEADER]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP0]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[RDX]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[RDX_LCSSA]] ; entry: %cmp.sgt = icmp sgt i32 %n, 0 @@ -48,8 +72,24 @@ exit: ; preds = %for.body, %entry ; %exitcond.not = icmp eq i64 %inc, 20000 ; and successfully vectorize the case without a runtime-check. define i32 @select_icmp_const_truncated_iv_const_exit(ptr %a) { -; CHECK-LABEL: define i32 @select_icmp_const_truncated_iv_const_exit -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i32 @select_icmp_const_truncated_iv_const_exit( +; CHECK-SAME: ptr [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP0]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[RDX]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 20000 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ] +; CHECK-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] ; entry: br label %for.body @@ -73,8 +113,24 @@ exit: ; preds = %for.body ; Without loop guard, the maximum constant trip count that can be vectorized is ; the signed maximum value of reduction type. define i32 @select_fcmp_max_valid_const_ub(ptr %a) { -; CHECK-LABEL: define i32 @select_fcmp_max_valid_const_ub -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i32 @select_fcmp_max_valid_const_ub( +; CHECK-SAME: ptr [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ -1, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP0]], 0.000000e+00 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[RDX]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 2147483648 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ] +; CHECK-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] ; entry: br label %for.body @@ -105,8 +161,30 @@ exit: ; preds = %for.body ; We cannot guarantee that %iv won't overflow an i32 value (and hence hit the ; sentinel value), and need a runtime-check to vectorize this case. define i32 @not_vectorized_select_icmp_const_truncated_iv_unwidened_exit(ptr %a, i64 %n) { -; CHECK-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unwidened_exit -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unwidened_exit( +; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP_SGT:%.*]] = icmp sgt i64 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ], [ 331, %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[RDX]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[RDX_LCSSA]] ; entry: %cmp.sgt = icmp sgt i64 %n, 0 @@ -137,8 +215,31 @@ exit: ; preds = %for.body, %entry ; We cannot guarantee that %iv won't overflow an i32 value (and hence hit the ; sentinel value), and need a runtime-check to vectorize this case. define i32 @not_vectorized_select_icmp_const_truncated_iv_unsigned_loop_guard(ptr %a, i32 %n) { -; CHECK-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unsigned_loop_guard -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unsigned_loop_guard( +; CHECK-SAME: ptr [[A:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY_PREHEADER:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[FOR_BODY_PREHEADER]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP1]], i32 [[TMP1]], i32 [[RDX]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[RDX_LCSSA]] ; entry: %cmp.not = icmp eq i32 %n, 0 @@ -173,8 +274,24 @@ exit: ; preds = %for.body, %entry ; Hence, the i32 will most certainly wrap and hit the sentinel value, and we ; cannot vectorize this case. define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(ptr %a) { -; CHECK-LABEL: define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound( +; CHECK-SAME: ptr [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 2147483646, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3 +; CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[CONV]], i32 [[RDX]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 4294967294 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ] +; CHECK-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] ; entry: br label %for.body @@ -198,8 +315,27 @@ exit: ; preds = %for.body ; Forbidding vectorization of the FindLastIV pattern involving a truncated ; induction variable in the absence of any loop guard. define i32 @not_vectorized_select_iv_icmp_no_guard(ptr %a, ptr %b, i32 %start, i32 %n) { -; CHECK-LABEL: define i32 @not_vectorized_select_iv_icmp_no_guard -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i32 @not_vectorized_select_iv_icmp_no_guard( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[START:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[COND:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[COND]] = select i1 [[CMP]], i32 [[TMP2]], i32 [[RDX]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i32 [ [[COND]], %[[FOR_BODY]] ] +; CHECK-NEXT: ret i32 [[COND_LCSSA]] ; entry: %wide.trip.count = zext i32 %n to i64 @@ -228,8 +364,24 @@ exit: ; preds = %for.body ; vectorizer is unable to guarantee that the induction variable is monotonic ; increasing. define i32 @not_vectorized_select_fcmp_invalid_const_ub(ptr %a) { -; CHECK-LABEL: define i32 @not_vectorized_select_fcmp_invalid_const_ub -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i32 @not_vectorized_select_fcmp_invalid_const_ub( +; CHECK-SAME: ptr [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ -1, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP0]], 0.000000e+00 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[RDX]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 2147483649 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ] +; CHECK-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] ; entry: br label %for.body @@ -254,8 +406,33 @@ exit: ; preds = %for.body ; instruction is smaller than the trip count type before extension, overflow ; could still occur. define i16 @not_vectorized_select_iv_icmp_overflow_unwidened_tripcount(ptr %a, ptr %b, i16 %start, i32 %n) { -; CHECK-LABEL: define i16 @not_vectorized_select_iv_icmp_overflow_unwidened_tripcount -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i16 @not_vectorized_select_iv_icmp_overflow_unwidened_tripcount( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i16 [[START:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP9]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i16 [ [[START]], %[[FOR_BODY_PREHEADER]] ], [ [[COND:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[IV]] to i16 +; CHECK-NEXT: [[COND]] = select i1 [[CMP3]], i16 [[TMP2]], i16 [[RDX]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i16 [ [[COND]], %[[FOR_BODY]] ] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RDX_0_LCSSA:%.*]] = phi i16 [ [[START]], %[[ENTRY]] ], [ [[COND_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret i16 [[RDX_0_LCSSA]] ; entry: %cmp9 = icmp sgt i32 %n, 0 diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll index 6108dbd0b191e..b989b8bbe5229 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll @@ -1,10 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK ; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK ; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK define i64 @select_icmp_const_1(ptr %a, i64 %n) { -; CHECK-LABEL: define i64 @select_icmp_const_1 -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i64 @select_icmp_const_1( +; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ 3, %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i64 [[TMP0]], 3 +; CHECK-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ] +; CHECK-NEXT: ret i64 [[COND_LCSSA]] ; entry: br label %for.body @@ -25,8 +41,23 @@ exit: ; preds = %for.body } define i64 @select_icmp_const_2(ptr %a, i64 %n) { -; CHECK-LABEL: define i64 @select_icmp_const_2 -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i64 @select_icmp_const_2( +; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ 3, %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i64 [[TMP0]], 3 +; CHECK-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[RDX]], i64 [[IV]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ] +; CHECK-NEXT: ret i64 [[COND_LCSSA]] ; entry: br label %for.body @@ -47,8 +78,23 @@ exit: ; preds = %for.body } define i64 @select_icmp_const_3_variable_rdx_start(ptr %a, i64 %rdx.start, i64 %n) { -; CHECK-LABEL: define i64 @select_icmp_const_3_variable_rdx_start -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i64 @select_icmp_const_3_variable_rdx_start( +; CHECK-SAME: ptr [[A:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[RDX_START]], %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i64 [[TMP0]], 3 +; CHECK-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ] +; CHECK-NEXT: ret i64 [[COND_LCSSA]] ; entry: br label %for.body @@ -69,8 +115,23 @@ exit: ; preds = %for.body } define i64 @select_fcmp_const_fast(ptr %a, i64 %n) { -; CHECK-LABEL: define i64 @select_fcmp_const_fast -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i64 @select_fcmp_const_fast( +; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ 2, %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP2:%.*]] = fcmp fast ueq float [[TMP0]], 3.000000e+00 +; CHECK-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ] +; CHECK-NEXT: ret i64 [[COND_LCSSA]] ; entry: br label %for.body @@ -91,8 +152,23 @@ exit: ; preds = %for.body } define i64 @select_fcmp_const(ptr %a, i64 %n) { -; CHECK-LABEL: define i64 @select_fcmp_const -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i64 @select_fcmp_const( +; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ 2, %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP2:%.*]] = fcmp ueq float [[TMP0]], 3.000000e+00 +; CHECK-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ] +; CHECK-NEXT: ret i64 [[COND_LCSSA]] ; entry: br label %for.body @@ -113,8 +189,25 @@ exit: ; preds = %for.body } define i64 @select_icmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) { -; CHECK-LABEL: define i64 @select_icmp -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i64 @select_icmp( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[RDX_START]], %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8 +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ] +; CHECK-NEXT: ret i64 [[COND_LCSSA]] ; entry: br label %for.body @@ -137,8 +230,25 @@ exit: ; preds = %for.body } define i64 @select_fcmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) { -; CHECK-LABEL: define i64 @select_fcmp -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i64 @select_fcmp( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[RDX_START]], %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ] +; CHECK-NEXT: ret i64 [[COND_LCSSA]] ; entry: br label %for.body @@ -161,8 +271,27 @@ exit: ; preds = %for.body } define i64 @select_icmp_min_valid_iv_start(ptr %a, ptr %b, i64 %rdx.start, i64 %n) { -; CHECK-LABEL: define i64 @select_icmp_min_valid_iv_start -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i64 @select_icmp_min_valid_iv_start( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV_J:%.*]] = phi i64 [ [[INC3:%.*]], %[[FOR_BODY]] ], [ -9223372036854775807, %[[ENTRY]] ] +; CHECK-NEXT: [[IV_I:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[RDX_START]], %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_I]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_I]] +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8 +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV_J]], i64 [[RDX]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV_I]], 1 +; CHECK-NEXT: [[INC3]] = add nsw i64 [[IV_J]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ] +; CHECK-NEXT: ret i64 [[COND_LCSSA]] ; entry: br label %for.body @@ -189,8 +318,27 @@ exit: ; preds = %for.body ; Negative tests define float @not_vectorized_select_float_induction_icmp(ptr %a, ptr %b, float %rdx.start, i64 %n) { -; CHECK-LABEL: @not_vectorized_select_float_induction_icmp -; CHECK-NOT: vector.body: +; CHECK-LABEL: define float @not_vectorized_select_float_induction_icmp( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], float [[RDX_START:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[FIV:%.*]] = phi float [ [[CONV3:%.*]], %[[FOR_BODY]] ], [ 0.000000e+00, %[[ENTRY]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi float [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[RDX_START]], %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8 +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[COND]] = select i1 [[CMP2]], float [[FIV]], float [[RDX]] +; CHECK-NEXT: [[CONV3]] = fadd float [[FIV]], 1.000000e+00 +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi float [ [[COND]], %[[FOR_BODY]] ] +; CHECK-NEXT: ret float [[COND_LCSSA]] ; entry: br label %for.body @@ -215,8 +363,23 @@ exit: ; preds = %for.body } define i64 @not_vectorized_select_decreasing_induction_icmp_const_start(ptr %a) { -; CHECK-LABEL: @not_vectorized_select_decreasing_induction_icmp_const_start -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_const_start( +; CHECK-SAME: ptr [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 19999, %[[ENTRY]] ], [ [[DEC:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP0]], 3 +; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i64 [[IV]], i64 [[RDX]] +; CHECK-NEXT: [[DEC]] = add nsw i64 [[IV]], -1 +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[IV]], 0 +; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[FOR_BODY]] ] +; CHECK-NEXT: ret i64 [[SPEC_SELECT_LCSSA]] ; entry: br label %for.body @@ -237,8 +400,25 @@ exit: ; preds = %for.body } define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start(ptr %a, ptr %b, i64 %rdx.start, i64 %n) { -; CHECK-LABEL: @not_vectorized_select_decreasing_induction_icmp_non_const_start -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[I_0_IN10:%.*]] = phi i64 [ [[IV:%.*]], %[[FOR_BODY]] ], [ [[N]], %[[ENTRY]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[RDX_START]], %[[ENTRY]] ] +; CHECK-NEXT: [[IV]] = add nsw i64 [[I_0_IN10]], -1 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8 +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[I_0_IN10]], 1 +; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ] +; CHECK-NEXT: ret i64 [[COND_LCSSA]] ; entry: br label %for.body @@ -263,8 +443,27 @@ exit: ; preds = %for.body ; The sentinel value for increasing-IV vectorization is -LONG_MAX, and since ; the IV hits this value, it is impossible to vectorize this case. define i64 @not_vectorized_select_icmp_iv_out_of_bound(ptr %a, ptr %b, i64 %rdx.start, i64 %n) { -; CHECK-LABEL: @not_vectorized_select_icmp_iv_out_of_bound -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i64 @not_vectorized_select_icmp_iv_out_of_bound( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV_J:%.*]] = phi i64 [ [[INC3:%.*]], %[[FOR_BODY]] ], [ -9223372036854775808, %[[ENTRY]] ] +; CHECK-NEXT: [[IV_I:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[RDX_START]], %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_I]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_I]] +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8 +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV_J]], i64 [[RDX]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV_I]], 1 +; CHECK-NEXT: [[INC3]] = add nsw i64 [[IV_J]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ] +; CHECK-NEXT: ret i64 [[COND_LCSSA]] ; entry: br label %for.body @@ -291,8 +490,23 @@ exit: ; preds = %for.body ; The sentinel value for decreasing-IV vectorization is LONG_MAX, and since ; the IV hits this value, it is impossible to vectorize this case. define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(ptr %a) { -; CHECK-LABEL: @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound( +; CHECK-SAME: ptr [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ], [ [[DEC:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[TMP0]], 3 +; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP1]], i64 [[IV]], i64 [[RDX]] +; CHECK-NEXT: [[DEC]] = add nsw i64 [[IV]], -1 +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[IV]], 0 +; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[FOR_BODY]] ] +; CHECK-NEXT: ret i64 [[SPEC_SELECT_LCSSA]] ; entry: br label %for.body @@ -313,8 +527,25 @@ exit: ; preds = %for.body } define i64 @not_vectorized_select_icmp_non_const_iv_start_value(ptr %a, ptr %b, i64 %ivstart, i64 %rdx.start, i64 %n) { -; CHECK-LABEL: define i64 @not_vectorized_select_icmp_non_const_iv_start_value -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i64 @not_vectorized_select_icmp_non_const_iv_start_value( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[IVSTART:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[IVSTART]], %[[ENTRY]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[RDX_START]], %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8 +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ] +; CHECK-NEXT: ret i64 [[COND_LCSSA]] ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/select-min-index.ll b/llvm/test/Transforms/LoopVectorize/select-min-index.ll index 3981d620eea28..1ce88f7221451 100644 --- a/llvm/test/Transforms/LoopVectorize/select-min-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-min-index.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s ; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S %s | FileCheck %s ; RUN: opt -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=2 -S %s | FileCheck %s @@ -5,8 +6,25 @@ ; Test cases for selecting the index with the minimum value. define i64 @test_vectorize_select_umin_idx(ptr %src, i64 %n) { -; CHECK-LABEL: @test_vectorize_select_umin_idx( -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i64 @test_vectorize_select_umin_idx( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]] +; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 [[L]]) +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i64 [[RES]] ; entry: br label %loop @@ -30,8 +48,27 @@ exit: } define i64 @test_vectorize_select_umin_idx_all_exit_inst(ptr %src, ptr %umin, i64 %n) { -; CHECK-LABEL: @test_vectorize_select_umin_idx_all_exit_inst( -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i64 @test_vectorize_select_umin_idx_all_exit_inst( +; CHECK-SAME: ptr [[SRC:%.*]], ptr [[UMIN:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]] +; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 [[L]]) +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES_UMIN:%.*]] = phi i64 [ [[MIN_VAL_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: store i64 [[RES_UMIN]], ptr [[UMIN]], align 4 +; CHECK-NEXT: ret i64 [[RES]] ; entry: br label %loop @@ -57,8 +94,25 @@ exit: } define i64 @test_vectorize_select_umin_idx_min_ops_switched(ptr %src, i64 %n) { -; CHECK-LABEL: @test_vectorize_select_umin_idx_min_ops_switched( -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i64 @test_vectorize_select_umin_idx_min_ops_switched( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]] +; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[L]], i64 [[MIN_VAL]]) +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i64 [[RES]] ; entry: br label %loop @@ -82,8 +136,26 @@ exit: } define i64 @test_not_vectorize_select_no_min_reduction(ptr %src, i64 %n) { -; CHECK-LABEL: @test_not_vectorize_select_no_min_reduction( -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i64 @test_not_vectorize_select_no_min_reduction( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]] +; CHECK-NEXT: [[MIN_VAL_NEXT]] = add i64 [[L]], 1 +; CHECK-NEXT: [[FOO:%.*]] = call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 [[L]]) +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i64 [[RES]] ; entry: br label %loop @@ -109,8 +181,23 @@ exit: define i64 @test_not_vectorize_cmp_value(i64 %x, i64 %n) { -; CHECK-LABEL: @test_not_vectorize_cmp_value( -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i64 @test_not_vectorize_cmp_value( +; CHECK-SAME: i64 [[X:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[X]] +; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 0) +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i64 [[RES]] ; entry: br label %loop @@ -132,8 +219,24 @@ exit: } define i32 @test_vectorize_select_umin_idx_with_trunc(i64 %n) { -; CHECK-LABEL: @test_vectorize_select_umin_idx_with_trunc( -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i32 @test_vectorize_select_umin_idx_with_trunc( +; CHECK-SAME: i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], 0 +; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 0) +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i32 [[TRUNC]], i32 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[RES]] ; entry: br label %loop @@ -156,8 +259,23 @@ exit: } define ptr @test_with_ptr_index(ptr %start, ptr %end) { -; CHECK-LABEL: @test_with_ptr_index( -; CHECK-NOT: vector.body: +; CHECK-LABEL: define ptr @test_with_ptr_index( +; CHECK-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi ptr [ null, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[CMP7_US:%.*]] = icmp ult i64 0, 0 +; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 0) +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP7_US]], ptr [[IV]], ptr [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = getelementptr i32, ptr [[IV]], i64 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq ptr [[IV_NEXT]], [[END]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi ptr [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret ptr [[RES]] ; entry: br label %loop @@ -179,8 +297,20 @@ exit: } define void @pointer_index(ptr %start) { -; CHECK-LABEL: @pointer_index( -; CHECK-NOT: vector.body: +; CHECK-LABEL: define void @pointer_index( +; CHECK-SAME: ptr [[START:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[PTR_IDX:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_SELECT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[CMP_I_I_I_I2531:%.*]] = icmp ult i16 0, 0 +; CHECK-NEXT: [[PTR_SELECT]] = select i1 [[CMP_I_I_I_I2531]], ptr [[PTR_IV]], ptr [[PTR_IDX]] +; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i16, ptr [[PTR_IV]], i64 1 +; CHECK-NEXT: [[CMP_I_I10_NOT_I_I_I:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], null +; CHECK-NEXT: br i1 [[CMP_I_I10_NOT_I_I_I]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -199,8 +329,23 @@ exit: } define ptr @pointer_index_2(ptr %start, ptr %end) { -; CHECK-LABEL: @pointer_index_2( -; CHECK-NOT: vector.body: +; CHECK-LABEL: define ptr @pointer_index_2( +; CHECK-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i16 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp ult i16 0, [[MIN_VAL]] +; CHECK-NEXT: [[MIN_VAL_NEXT]] = call i16 @llvm.umin.i16(i16 0, i16 [[MIN_VAL]]) +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP_I_I_I_I]], ptr [[PTR_IV]], ptr [[MIN_IDX]] +; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i16, ptr [[PTR_IV]], i64 1 +; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]] +; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi ptr [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret ptr [[RES]] ; entry: br label %loop @@ -222,8 +367,25 @@ exit: } define i64 @test_no_vectorize_select_iv_decrement(ptr %src) { -; CHECK-LABEL: @test_no_vectorize_select_iv_decrement( -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i64 @test_no_vectorize_select_iv_decrement( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1000, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]] +; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 [[L]]) +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], -1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 0 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i64 [[RES]] ; entry: br label %loop @@ -247,8 +409,25 @@ exit: } define i64 @test_no_vectorize_select_iv_sub(ptr %src) { -; CHECK-LABEL: @test_no_vectorize_select_iv_sub( -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i64 @test_no_vectorize_select_iv_sub( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1000, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]] +; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 [[L]]) +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = sub i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 0 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i64 [[RES]] ; entry: br label %loop @@ -272,8 +451,25 @@ exit: } define i64 @test_no_vectorize_select_iv_mul(ptr %src) { -; CHECK-LABEL: @test_no_vectorize_select_iv_mul( -; CHECK-NOT: vector.body: +; CHECK-LABEL: define i64 @test_no_vectorize_select_iv_mul( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]] +; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 [[L]]) +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = mul i64 [[IV]], 2 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 128 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i64 [[RES]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 6bb20a301e0ad..195f6a48640e5 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -171,7 +171,7 @@ define float @print_reduction(i64 %n, ptr noalias %y) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb -; CHECK-NEXT: IR %red.next.lcssa = phi float [ %red.next, %for.body ] (extra operand: vp<[[RED_EX]]>) +; CHECK-NEXT: IR %red.next.lcssa = phi float [ %red.next, %for.body ] (extra operand: vp<[[RED_EX]]> from middle.block) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph @@ -476,7 +476,7 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb -; CHECK-NEXT: IR %muladd.lcssa = phi float [ %muladd, %for.body ] (extra operand: vp<[[RED_EX]]>) +; CHECK-NEXT: IR %muladd.lcssa = phi float [ %muladd, %for.body ] (extra operand: vp<[[RED_EX]]> from middle.block) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph @@ -716,7 +716,7 @@ define i32 @print_exit_value(ptr %ptr, i32 %off) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb -; CHECK-NEXT: IR %lcssa = phi i32 [ %add, %loop ] (extra operand: vp<[[EXIT]]>) +; CHECK-NEXT: IR %lcssa = phi i32 [ %add, %loop ] (extra operand: vp<[[EXIT]]> from middle.block) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph @@ -1111,7 +1111,7 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb -; CHECK-NEXT: IR %for.1.lcssa = phi i16 [ %for.1, %loop ] (extra operand: vp<[[FOR_RESULT]]>) +; CHECK-NEXT: IR %for.1.lcssa = phi i16 [ %for.1, %loop ] (extra operand: vp<[[FOR_RESULT]]> from middle.block) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph @@ -1119,7 +1119,7 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) { ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_P]]>) +; CHECK-NEXT: IR %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_P]]> from scalar.ph) ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK: IR %exitcond.not = icmp eq i64 %iv.next, 1000 ; CHECK-NEXT: No successors diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll index a0b390011faa6..3bf13b76a9332 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll @@ -7,7 +7,7 @@ define void @test() { ; CHECK-NEXT: [[ADD:%.*]] = add i32 1, 0 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> , i32 [[ADD]], i32 3 ; CHECK-NEXT: [[TMP1:%.*]] = icmp samesign ult <4 x i32> [[TMP0]], zeroinitializer -; CHECK-NEXT: [[ICMP:%.*]] = icmp samesign ult i32 0, 0 +; CHECK-NEXT: [[ICMP:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[ICMP]], i32 0, i32 0 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[SELECT]] to i64 ; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr ptr addrspace(1), ptr addrspace(1) null, i64 [[ZEXT]] @@ -16,8 +16,6 @@ define void @test() { ; CHECK-NEXT: [[CALL:%.*]] = call i32 null(<2 x double> zeroinitializer) ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> , i32 [[CALL]], i32 3 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> poison, <4 x i1> [[TMP3]], i64 0) -; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> [[TMP4]], <4 x i1> [[TMP1]], i64 4) ; CHECK-NEXT: ret void ; bb: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-power-2-num-elems-reused.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-power-2-num-elems-reused.ll new file mode 100644 index 0000000000000..4ad02d47fb385 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-power-2-num-elems-reused.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-10 < %s | FileCheck %s + +define i64 @test() { +; CHECK-LABEL: define i64 @test() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[OR54_I_I_6:%.*]] = or i32 0, 0 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[OR54_I_I_6]], i32 8 +; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> [[TMP0]], <8 x i32> zeroinitializer, i64 0) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i32> [[TMP2]] to <16 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> [[TMP3]]) +; CHECK-NEXT: ret i64 [[TMP4]] +; +entry: + %xor148.2.i = xor i32 0, 0 + %conv193.i = zext i32 %xor148.2.i to i64 + %conv193.1.i = zext i32 %xor148.2.i to i64 + %or194.1.i = or i64 %conv193.i, %conv193.1.i + %xor148.2.i.1 = xor i32 0, 0 + %conv193.i.1 = zext i32 %xor148.2.i.1 to i64 + %or194.i.1 = or i64 %or194.1.i, %conv193.i.1 + %conv193.1.i.1 = zext i32 %xor148.2.i.1 to i64 + %or194.1.i.1 = or i64 %or194.i.1, %conv193.1.i.1 + %xor148.2.i.2 = xor i32 0, 0 + %conv193.i.2 = zext i32 %xor148.2.i.2 to i64 + %or194.i.2 = or i64 %or194.1.i.1, %conv193.i.2 + %conv193.1.i.2 = zext i32 %xor148.2.i.2 to i64 + %or194.1.i.2 = or i64 %or194.i.2, %conv193.1.i.2 + %xor148.2.i.3 = xor i32 0, 0 + %conv193.i.3 = zext i32 %xor148.2.i.3 to i64 + %or194.i.3 = or i64 %or194.1.i.2, %conv193.i.3 + %conv193.1.i.3 = zext i32 %xor148.2.i.3 to i64 + %or194.1.i.3 = or i64 %or194.i.3, %conv193.1.i.3 + %xor148.2.i.4 = xor i32 0, 0 + %conv193.i.4 = zext i32 %xor148.2.i.4 to i64 + %or194.i.4 = or i64 %or194.1.i.3, %conv193.i.4 + %conv193.1.i.4 = zext i32 %xor148.2.i.4 to i64 + %or194.1.i.4 = or i64 %or194.i.4, %conv193.1.i.4 + %xor148.2.i.5 = xor i32 0, 0 + %conv193.i.5 = zext i32 %xor148.2.i.5 to i64 + %or194.i.5 = or i64 %or194.1.i.4, %conv193.i.5 + %conv193.1.i.5 = zext i32 %xor148.2.i.5 to i64 + %or194.1.i.5 = or i64 %or194.i.5, %conv193.1.i.5 + %xor148.2.i.6 = xor i32 0, 0 + %conv193.i.6 = zext i32 %xor148.2.i.6 to i64 + %or194.i.6 = or i64 %or194.1.i.5, %conv193.i.6 + %or54.i.i.6 = or i32 %xor148.2.i.6, 0 + %conv193.1.i.6 = zext i32 %or54.i.i.6 to i64 + %xor148.2.i.7 = xor i32 0, 0 + %conv193.i.7 = zext i32 %xor148.2.i.7 to i64 + %0 = or i64 %or194.i.6, %conv193.i.7 + %conv193.1.i.7 = zext i32 %xor148.2.i.7 to i64 + %1 = or i64 %0, %conv193.1.i.7 + %or194.1.i.7 = or i64 %1, %conv193.1.i.6 + ret i64 %or194.1.i.7 +} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll new file mode 100644 index 0000000000000..6d22bb06d5e03 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll @@ -0,0 +1,99 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-300 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define void @test() { +; CHECK-LABEL: define void @test() { +; CHECK-NEXT: [[XOR108_I_I_I:%.*]] = xor i64 0, 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> , i64 [[XOR108_I_I_I]], i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i64> poison, i64 [[XOR108_I_I_I]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i64> @llvm.vector.insert.v16i64.v8i64(<16 x i64> poison, <8 x i64> zeroinitializer, i64 0) +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i64> @llvm.vector.insert.v16i64.v4i64(<16 x i64> [[TMP4]], <4 x i64> [[TMP2]], i64 8) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i64> [[TMP5]], <16 x i64> [[TMP3]], <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i64> [[TMP6]], <16 x i64> poison, <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i64> [[TMP7]] to <16 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i1> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = freeze <16 x i1> [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = zext <16 x i1> [[TMP10]] to <16 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <16 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: ret void +; + %xor108.i.i.i = xor i64 0, 1 + %conv115.i.i.i = trunc i64 %xor108.i.i.i to i16 + %add.i.i.i.i = or i16 %conv115.i.i.i, 0 + %add.i.frozen.i.i.i = freeze i16 %add.i.i.i.i + %.cmp.not.i.i.i = icmp eq i16 %add.i.frozen.i.i.i, 0 + %cond.i1002.1.i.i.i = lshr i64 0, 0 + %conv115.1.i.i.i = trunc i64 %cond.i1002.1.i.i.i to i16 + %add.i.1.i.i.i = or i16 %conv115.1.i.i.i, 0 + %add.i.frozen.1.i.i.i = freeze i16 %add.i.1.i.i.i + %.cmp.not.1.i.i.i = icmp eq i16 %add.i.frozen.1.i.i.i, 0 + %cond.i1002.2.i.i.i = lshr i64 %xor108.i.i.i, 0 + %conv115.2.i.i.i = trunc i64 %cond.i1002.2.i.i.i to i16 + %add.i.2.i.i.i = or i16 %conv115.2.i.i.i, 0 + %add.i.frozen.2.i.i.i = freeze i16 %add.i.2.i.i.i + %.cmp.not.2.i.i.i = icmp eq i16 %add.i.frozen.2.i.i.i, 0 + %cond.i1002.3.i.i.i = lshr i64 0, 0 + %conv115.3.i.i.i = trunc i64 %cond.i1002.3.i.i.i to i16 + %add.i.3.i.i.i = or i16 %conv115.3.i.i.i, 0 + %add.i.frozen.3.i.i.i = freeze i16 %add.i.3.i.i.i + %.cmp.not.3.i.i.i = icmp eq i16 %add.i.frozen.3.i.i.i, 0 + %conv115.i.i.i.1 = trunc i64 %xor108.i.i.i to i16 + %add.i.i.i.i.1 = or i16 %conv115.i.i.i.1, 0 + %add.i.frozen.i.i.i.1 = freeze i16 %add.i.i.i.i.1 + %.cmp.not.i.i.i.1 = icmp eq i16 %add.i.frozen.i.i.i.1, 0 + %cond.i1002.1.i.i.i.1 = lshr i64 0, 0 + %conv115.1.i.i.i.1 = trunc i64 %cond.i1002.1.i.i.i.1 to i16 + %add.i.1.i.i.i.1 = or i16 %conv115.1.i.i.i.1, 0 + %add.i.frozen.1.i.i.i.1 = freeze i16 %add.i.1.i.i.i.1 + %.cmp.not.1.i.i.i.1 = icmp eq i16 %add.i.frozen.1.i.i.i.1, 0 + %cond.i1002.2.i.i.i.1 = lshr i64 0, 0 + %conv115.2.i.i.i.1 = trunc i64 %cond.i1002.2.i.i.i.1 to i16 + %add.i.2.i.i.i.1 = or i16 %conv115.2.i.i.i.1, 0 + %add.i.frozen.2.i.i.i.1 = freeze i16 %add.i.2.i.i.i.1 + %.cmp.not.2.i.i.i.1 = icmp eq i16 %add.i.frozen.2.i.i.i.1, 0 + %cond.i1002.3.i.i.i.1 = lshr i64 0, 0 + %conv115.3.i.i.i.1 = trunc i64 %cond.i1002.3.i.i.i.1 to i16 + %add.i.3.i.i.i.1 = or i16 %conv115.3.i.i.i.1, 0 + %add.i.frozen.3.i.i.i.1 = freeze i16 %add.i.3.i.i.i.1 + %.cmp.not.3.i.i.i.1 = icmp eq i16 %add.i.frozen.3.i.i.i.1, 0 + %conv115.i.i.i.2 = trunc i64 %xor108.i.i.i to i16 + %add.i.i.i.i.2 = or i16 %conv115.i.i.i.2, 0 + %add.i.frozen.i.i.i.2 = freeze i16 %add.i.i.i.i.2 + %.cmp.not.i.i.i.2 = icmp eq i16 %add.i.frozen.i.i.i.2, 0 + %cond.i1002.1.i.i.i.2 = lshr i64 0, 0 + %conv115.1.i.i.i.2 = trunc i64 %cond.i1002.1.i.i.i.2 to i16 + %add.i.1.i.i.i.2 = or i16 %conv115.1.i.i.i.2, 0 + %add.i.frozen.1.i.i.i.2 = freeze i16 %add.i.1.i.i.i.2 + %.cmp.not.1.i.i.i.2 = icmp eq i16 %add.i.frozen.1.i.i.i.2, 0 + %cond.i1002.2.i.i.i.2 = lshr i64 0, 0 + %conv115.2.i.i.i.2 = trunc i64 %cond.i1002.2.i.i.i.2 to i16 + %add.i.2.i.i.i.2 = or i16 %conv115.2.i.i.i.2, 0 + %add.i.frozen.2.i.i.i.2 = freeze i16 %add.i.2.i.i.i.2 + %.cmp.not.2.i.i.i.2 = icmp eq i16 %add.i.frozen.2.i.i.i.2, 0 + %cond.i1002.3.i.i.i.2 = lshr i64 0, 0 + %conv115.3.i.i.i.2 = trunc i64 %cond.i1002.3.i.i.i.2 to i16 + %add.i.3.i.i.i.2 = or i16 %conv115.3.i.i.i.2, 0 + %add.i.frozen.3.i.i.i.2 = freeze i16 %add.i.3.i.i.i.2 + %.cmp.not.3.i.i.i.2 = icmp eq i16 %add.i.frozen.3.i.i.i.2, 0 + %conv115.i.i.i.3 = trunc i64 %xor108.i.i.i to i16 + %add.i.i.i.i.3 = or i16 %conv115.i.i.i.3, 0 + %add.i.frozen.i.i.i.3 = freeze i16 %add.i.i.i.i.3 + %.cmp.not.i.i.i.3 = icmp eq i16 %add.i.frozen.i.i.i.3, 0 + %cond.i1002.1.i.i.i.3 = lshr i64 0, 0 + %conv115.1.i.i.i.3 = trunc i64 %cond.i1002.1.i.i.i.3 to i16 + %add.i.1.i.i.i.3 = or i16 %conv115.1.i.i.i.3, 0 + %add.i.frozen.1.i.i.i.3 = freeze i16 %add.i.1.i.i.i.3 + %.cmp.not.1.i.i.i.3 = icmp eq i16 %add.i.frozen.1.i.i.i.3, 0 + %cond.i1002.2.i.i.i.3 = lshr i64 0, 0 + %conv115.2.i.i.i.3 = trunc i64 %cond.i1002.2.i.i.i.3 to i16 + %add.i.2.i.i.i.3 = or i16 %conv115.2.i.i.i.3, 0 + %add.i.frozen.2.i.i.i.3 = freeze i16 %add.i.2.i.i.i.3 + %.cmp.not.2.i.i.i.3 = icmp eq i16 %add.i.frozen.2.i.i.i.3, 0 + %cond.i1002.3.i.i.i.3 = lshr i64 0, 0 + %conv115.3.i.i.i.3 = trunc i64 %cond.i1002.3.i.i.i.3 to i16 + %add.i.3.i.i.i.3 = or i16 %conv115.3.i.i.i.3, 0 + %add.i.frozen.3.i.i.i.3 = freeze i16 %add.i.3.i.i.i.3 + %.cmp.not.3.i.i.i.3 = icmp eq i16 %add.i.frozen.3.i.i.i.3, 0 + ret void +} diff --git a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll index 405afd5969a41..5c9058b482320 100644 --- a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll @@ -276,21 +276,19 @@ if.false: ; preds = %if.true, %entry } ;; Both of successor 0 and successor 1 have a single predecessor. -;; TODO: Support transform for this case. -define void @single_predecessor(ptr %p, ptr %q, i32 %a) { +define i32 @single_predecessor(ptr %p, ptr %q, i32 %a) { ; CHECK-LABEL: @single_predecessor( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0 -; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] -; CHECK: common.ret: -; CHECK-NEXT: ret void -; CHECK: if.end: -; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4 -; CHECK-NEXT: br label [[COMMON_RET:%.*]] -; CHECK: if.then: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Q]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4 -; CHECK-NEXT: br label [[COMMON_RET]] +; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[TOBOOL]], true +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i1 [[TOBOOL]] to <1 x i1> +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison) +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32 +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]]) +; CHECK-NEXT: [[DOT:%.*]] = select i1 [[TOBOOL]], i32 2, i32 3 +; CHECK-NEXT: ret i32 [[DOT]] ; entry: %tobool = icmp ne i32 %a, 0 @@ -298,12 +296,12 @@ entry: if.end: store i32 1, ptr %q - ret void + ret i32 2 if.then: %0 = load i32, ptr %q store i32 %0, ptr %p - ret void + ret i32 3 } ;; Hoist 6 stores. @@ -759,6 +757,44 @@ if.true: ret i32 %res } +;; Not transform if either BB has multiple successors. +define i32 @not_multi_successors(i1 %c1, i32 %c2, ptr %p) { +; CHECK-LABEL: @not_multi_successors( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C1:%.*]], label [[ENTRY_IF:%.*]], label [[COMMON_RET:%.*]] +; CHECK: entry.if: +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[P:%.*]], align 4 +; CHECK-NEXT: switch i32 [[C2:%.*]], label [[COMMON_RET]] [ +; CHECK-NEXT: i32 0, label [[SW_BB:%.*]] +; CHECK-NEXT: i32 1, label [[SW_BB]] +; CHECK-NEXT: ] +; CHECK: common.ret: +; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VAL]], [[ENTRY_IF]] ], [ 0, [[SW_BB]] ] +; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] +; CHECK: sw.bb: +; CHECK-NEXT: br label [[COMMON_RET]] +; +entry: + br i1 %c1, label %entry.if, label %entry.else + +entry.if: ; preds = %entry + %val = load i32, ptr %p, align 4 + switch i32 %c2, label %return [ + i32 0, label %sw.bb + i32 1, label %sw.bb + ] + +entry.else: ; preds = %entry + ret i32 0 + +sw.bb: ; preds = %entry.if, %entry.if + br label %return + +return: ; preds = %sw.bb, %entry.if + %ret = phi i32 [ %val, %entry.if ], [ 0, %sw.bb ] + ret i32 %ret +} + declare i32 @read_memory_only() readonly nounwind willreturn speculatable !llvm.dbg.cu = !{!0} diff --git a/llvm/test/tools/dsymutil/X86/DWARFLinkerParallel/odr-string.test b/llvm/test/tools/dsymutil/X86/DWARFLinkerParallel/odr-string.test index 26cb9c65eb4ef..3bb15e180065d 100644 --- a/llvm/test/tools/dsymutil/X86/DWARFLinkerParallel/odr-string.test +++ b/llvm/test/tools/dsymutil/X86/DWARFLinkerParallel/odr-string.test @@ -132,11 +132,11 @@ CHECK: 0x[[STRING:[0-9a-f]*]]: DW_TAG_typedef{{.*[[:space:]].*}}DW_AT_type{{.*}} CHECK:DW_TAG_reference_type -CHECK: 0x[[CONST_STR_REF:[0-9a-f]*]]: DW_TAG_reference_type{{.*[[:space:]].*}}DW_AT_type{{.*}}0x[[CONST_STRING:[0-9a-f]*]] "const string" +CHECK: 0x[[CONST_STR_REF:[0-9a-f]*]]: DW_TAG_reference_type{{.*[[:space:]].*}}DW_AT_type{{.*}}0x[[CONST_STRING:[0-9a-f]*]] "const std::__1::string" CHECK:DW_TAG_const_type -CHECK: 0x[[CONST_STRING]]: DW_TAG_const_type{{.*[[:space:]].*}}DW_AT_type{{.*}}0x[[STRING]] "string" +CHECK: 0x[[CONST_STRING]]: DW_TAG_const_type{{.*[[:space:]].*}}DW_AT_type{{.*}}0x[[STRING]] "std::__1::string" CHECK: Compile Unit: @@ -148,7 +148,7 @@ CHECK: DW_AT_high_pc CHECK: DW_AT_name{{.*}}"PrintSize" CHECK: DW_TAG_formal_parameter CHECK: DW_AT_name{{.*}}"String" -CHECK: DW_AT_type{{.*}}0x00000000[[CONST_STR_REF]] "const string &" +CHECK: DW_AT_type{{.*}}0x00000000[[CONST_STR_REF]] "const std::__1::string &" CHECK: Compile Unit: CHECK: DW_TAG_compile_unit diff --git a/llvm/test/tools/llvm-mca/AMDGPU/gfx950.s b/llvm/test/tools/llvm-mca/AMDGPU/gfx950.s index cdb96dd717e94..03b4041f77bce 100644 --- a/llvm/test/tools/llvm-mca/AMDGPU/gfx950.s +++ b/llvm/test/tools/llvm-mca/AMDGPU/gfx950.s @@ -1,13 +1,9 @@ # RUN: llvm-mca -mtriple=amdgcn -mcpu=gfx950 --timeline --iterations=1 --timeline-max-cycles=0 < %s | FileCheck %s # CHECK: Iterations: 1 -# CHECK: Instructions: 15 -# CHECK: Total Cycles: 60 -# CHECK: Total uOps: 15 - -v_mfma_ld_scale_b32 v0, v0 -v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] -v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] +# CHECK: Instructions: 129 +# CHECK: Total Cycles: 1069 +# CHECK: Total uOps: 129 v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1 v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[0:3], a[4:7] @@ -15,7 +11,6 @@ v_mfma_f32_32x32x16_f16 v[0:15], v[0:3], v[0:3], v[0:15] v_mfma_f32_32x32x16_f16 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2 v_mfma_f32_32x32x16_bf16 v[0:15], v[0:3], v[0:3], v[0:15] v_mfma_f32_32x32x16_bf16 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2 - v_mfma_i32_16x16x64_i8 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1 v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[0:3], a[4:7] v_mfma_i32_32x32x32_i8 v[0:15], v[0:3], v[0:3], v[0:15] @@ -23,20 +18,312 @@ v_mfma_i32_32x32x32_i8 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2 v_mfma_f32_16x16x32_bf16 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1 v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[0:3], a[4:7] +v_mfma_ld_scale_b32 v0, v0 + +;; FIXME: should have different cycle count depending on whether either matrix is f8 +;; TODO: test vdc/adc +v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] +v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] blgp:1 +v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] blgp:2 +v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] blgp:3 +v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] blgp:4 +v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:1 +v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:2 +v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:3 +v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:4 +v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:2 blgp:1 +v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:1 blgp:2 + +;; FIXME: should have different cycle count depending on whether either matrix is f8 +v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] +v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:1 +v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:2 +v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:3 +v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:4 +v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] cbsz:1 +v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] cbsz:2 +v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] cbsz:3 +v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] cbsz:4 +v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] cbsz:2 +v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:1 + +;; FIXME: should have different cycle count depending on whether either matrix is f8 +v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3], v5, v5 + +;; FIXME +;; v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3], v5, v5 blgp:1 +;; v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3], v5, v5 blgp:2 +;; v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3], v5, v5 blgp:3 +;; v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3], v5, v5 blgp:4 + +v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15], v5, v5 + +;; FIXME +;; v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15], v5, v5 blgp:1 +;; v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15], v5, v5 blgp:2 +;; v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15], v5, v5 blgp:3 +;; v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15], v5, v5 blgp:4 + + +;; TODO: These results are wrong +v_smfmac_f32_16x16x64_f16 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 +v_smfmac_f32_32x32x32_f16 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 +v_smfmac_f32_16x16x64_bf16 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 +v_smfmac_f32_32x32x32_bf16 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 +v_smfmac_i32_16x16x128_i8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 +v_smfmac_i32_32x32x64_i8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 + +v_smfmac_f32_16x16x128_bf8_bf8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 +v_smfmac_f32_16x16x128_bf8_fp8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 +v_smfmac_f32_16x16x128_fp8_bf8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 +v_smfmac_f32_16x16x128_fp8_fp8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 + +v_smfmac_f32_32x32x64_bf8_bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 +v_smfmac_f32_32x32x64_bf8_fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 +v_smfmac_f32_32x32x64_fp8_bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 +v_smfmac_f32_32x32x64_fp8_fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 + +v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5] +v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5] + +v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33] +v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33] + +v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] +v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], v[2:3], v[2:3] + +v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] +v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] + +v_mfma_f32_16x16x16_f16 v[0:3], v[4:5], v[6:7], v[0:3] +v_mfma_f32_16x16x16_f16 a[0:3], v[4:5], v[6:7], a[0:3] + +v_mfma_f32_32x32x8_f16 v[0:15], v[4:5], v[6:7], v[0:15] +v_mfma_f32_32x32x8_f16 a[0:15], v[4:5], v[6:7], a[0:15] + +v_mfma_f32_16x16x16_bf16 v[0:3], v[4:5], v[6:7], v[0:3] +v_mfma_f32_16x16x16_bf16 a[0:3], v[4:5], v[6:7], a[0:3] + +v_mfma_f32_32x32x8_bf16 v[0:15], v[4:5], v[6:7], v[0:15] +v_mfma_f32_32x32x8_bf16 a[0:15], v[4:5], v[6:7], a[0:15] + +v_mfma_i32_16x16x32_i8 v[0:3], v[4:5], v[6:7], v[0:3] +v_mfma_i32_16x16x32_i8 a[0:3], v[4:5], v[6:7], a[0:3] + +v_mfma_i32_32x32x16_i8 v[0:15], v[2:3], v[4:5], v[0:15] +v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15] + +v_mfma_f32_4x4x4_16b_f16 v[0:3], v[0:1], v[2:3], v[2:5] +v_mfma_f32_4x4x4_16b_f16 a[0:3], v[0:1], v[2:3], a[2:5] + +v_mfma_f32_16x16x4_4b_f16 v[0:15], v[2:3], v[4:5], v[18:33] +v_mfma_f32_16x16x4_4b_f16 a[0:15], v[2:3], v[4:5], a[18:33] + +v_mfma_f32_32x32x4_2b_f16 v[0:31], v[0:1], v[2:3], v[34:65] +v_mfma_f32_32x32x4_2b_f16 a[0:31], v[0:1], v[2:3], a[34:65] + +v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[0:1], v[2:3], v[2:5] +v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[0:1], v[2:3], a[2:5] + +v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[2:3], v[4:5], v[18:33] +v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[4:5], a[18:33] + +v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[0:1], v[2:3], v[34:65] +v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[0:1], v[2:3], a[34:65] + +v_mfma_f32_4x4x1_16b_f32 v[0:3], v0, v1, v[2:5] +v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v1, a[2:5] + +v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v1, v[18:33] +v_mfma_f32_16x16x1_4b_f32 a[0:15], v0, v1, a[18:33] + +v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5] +v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5] + +v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, v[34:65] blgp:7 +v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[34:65] blgp:7 + +v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33] +v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33] + +v_mfma_i32_4x4x4_16b_i8 v[0:3], v0, v1, v[2:5] +v_mfma_i32_4x4x4_16b_i8 a[0:3], v0, v1, a[2:5] + +v_mfma_i32_16x16x4_4b_i8 v[0:15], v0, v1, v[18:33] +v_mfma_i32_16x16x4_4b_i8 a[0:15], v0, v1, a[18:33] + +v_mfma_i32_32x32x4_2b_i8 v[0:31], v0, v1, v[34:65] +v_mfma_i32_32x32x4_2b_i8 a[0:31], v0, v1, a[34:65] + +v_smfmac_f32_16x16x32_f16 v[10:13], a[2:3], v[4:7], v0 cbsz:3 abid:1 +v_smfmac_f32_16x16x32_f16 a[10:13], v[2:3], a[4:7], v1 + +v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1 +v_smfmac_f32_32x32x16_f16 a[10:25], v[2:3], a[4:7], v3 + +v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1 +v_smfmac_f32_16x16x32_bf16 a[10:13], v[2:3], a[4:7], v5 + +v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v8 cbsz:3 abid:1 +v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v9 + +v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v10 cbsz:3 abid:1 +v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v11 + +v_mfma_f32_16x16x32_bf8_bf8 v[0:3], v[2:3], v[4:5], v[0:3] +v_mfma_f32_16x16x32_bf8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] + +v_mfma_f32_16x16x32_bf8_fp8 v[0:3], v[2:3], v[4:5], v[0:3] +v_mfma_f32_16x16x32_bf8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] + +v_mfma_f32_16x16x32_fp8_bf8 v[0:3], v[2:3], v[4:5], v[0:3] +v_mfma_f32_16x16x32_fp8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] + +v_mfma_f32_16x16x32_fp8_fp8 v[0:3], v[2:3], v[4:5], v[0:3] +v_mfma_f32_16x16x32_fp8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] + +v_mfma_f32_32x32x16_bf8_bf8 v[0:15], v[2:3], v[4:5], v[0:15] +v_mfma_f32_32x32x16_fp8_bf8 v[0:15], v[2:3], v[4:5], v[0:15] +v_mfma_f32_32x32x16_bf8_fp8 v[0:15], v[2:3], v[4:5], v[0:15] +v_mfma_f32_32x32x16_fp8_fp8 v[0:15], v[2:3], v[4:5], v[0:15] + +v_smfmac_f32_16x16x64_bf8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 +v_smfmac_f32_16x16x64_bf8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 +v_smfmac_f32_16x16x64_fp8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 +v_smfmac_f32_16x16x64_fp8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 + +v_smfmac_f32_32x32x32_bf8_bf8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 +v_smfmac_f32_32x32x32_bf8_fp8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 +v_smfmac_f32_32x32x32_fp8_bf8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 +v_smfmac_f32_32x32x32_fp8_fp8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 # CHECK: [0] [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: - - - - 1.00 - - v_mfma_ld_scale_b32 v0, v0 -# CHECK-NEXT: - - - - 1.00 - - v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] -# CHECK-NEXT: - - - - 1.00 - - v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[0:3], a[4:7] # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_f16 v[0:15], v[0:3], v[0:3], v[0:15] # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_f16 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_bf16 v[0:15], v[0:3], v[0:3], v[0:15] # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_bf16 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2 -# CHECK-NEXT: - - - - 1.00 - - v_mfma_i32_16x16x64_i8 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1 -# CHECK-NEXT: - - - - 1.00 - - v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[0:3], a[4:7] -# CHECK-NEXT: - - - - 1.00 - - v_mfma_i32_32x32x32_i8 v[0:15], v[0:3], v[0:3], v[0:15] -# CHECK-NEXT: - - - - 1.00 - - v_mfma_i32_32x32x32_i8 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_i32_16x16x64_i8 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[0:3], a[4:7] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_32x32x32_i8 v[0:15], v[0:3], v[0:3], v[0:15] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_32x32x32_i8 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf16 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[0:3], a[4:7] +# CHECK-NEXT: - - - - 1.00 - - v_mfma_ld_scale_b32 v0, v0 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] blgp:1 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] blgp:2 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] blgp:3 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] blgp:4 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:1 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:2 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:3 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:4 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:2 blgp:1 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:1 blgp:2 +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:1 +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:2 +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:3 +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:4 +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] cbsz:1 +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] cbsz:2 +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] cbsz:3 +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] cbsz:4 +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] cbsz:2 +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:1 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3], v5, v5 op_sel_hi:[0,0,0] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15], v5, v5 op_sel_hi:[0,0,0] +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_f16 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_f16 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_bf16 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_bf16 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_i32_16x16x128_i8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_i32_32x32x64_i8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x128_bf8_bf8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x128_bf8_fp8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x128_fp8_bf8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x128_fp8_fp8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x64_bf8_bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x64_bf8_fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x64_fp8_bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x64_fp8_fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5] +# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33] +# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33] +# CHECK-NEXT: - - - - 4.00 - - v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] +# CHECK-NEXT: - - - - 4.00 - - v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], v[2:3], v[2:3] +# CHECK-NEXT: - - - - 16.00 - - v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] +# CHECK-NEXT: - - - - 16.00 - - v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_f16 v[0:3], v[4:5], v[6:7], v[0:3] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_f16 a[0:3], v[4:5], v[6:7], a[0:3] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_f16 v[0:15], v[4:5], v[6:7], v[0:15] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_f16 a[0:15], v[4:5], v[6:7], a[0:15] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_bf16 v[0:3], v[4:5], v[6:7], v[0:3] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_bf16 a[0:3], v[4:5], v[6:7], a[0:3] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_bf16 v[0:15], v[4:5], v[6:7], v[0:15] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_bf16 a[0:15], v[4:5], v[6:7], a[0:15] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_i32_16x16x32_i8 v[0:3], v[4:5], v[6:7], v[0:3] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_i32_16x16x32_i8 a[0:3], v[4:5], v[6:7], a[0:3] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_32x32x16_i8 v[0:15], v[2:3], v[4:5], v[0:15] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15] +# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_f16 v[0:3], v[0:1], v[2:3], v[2:5] +# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_f16 a[0:3], v[0:1], v[2:3], a[2:5] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_f16 v[0:15], v[2:3], v[4:5], v[18:33] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_f16 a[0:15], v[2:3], v[4:5], a[18:33] +# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_f16 v[0:31], v[0:1], v[2:3], v[34:65] +# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_f16 a[0:31], v[0:1], v[2:3], a[34:65] +# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[0:1], v[2:3], v[2:5] +# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[0:1], v[2:3], a[2:5] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[2:3], v[4:5], v[18:33] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[4:5], a[18:33] +# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[0:1], v[2:3], v[34:65] +# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[0:1], v[2:3], a[34:65] +# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x1_16b_f32 v[0:3], v0, v1, v[2:5] +# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v1, a[2:5] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v1, v[18:33] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x1_4b_f32 a[0:15], v0, v1, a[18:33] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5] +# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, v[34:65] blgp:7 +# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[34:65] blgp:7 +# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33] +# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33] +# CHECK-NEXT: - - - - - - 2.00 v_mfma_i32_4x4x4_16b_i8 v[0:3], v0, v1, v[2:5] +# CHECK-NEXT: - - - - - - 2.00 v_mfma_i32_4x4x4_16b_i8 a[0:3], v0, v1, a[2:5] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_16x16x4_4b_i8 v[0:15], v0, v1, v[18:33] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_16x16x4_4b_i8 a[0:15], v0, v1, a[18:33] +# CHECK-NEXT: - - - - - - 16.00 v_mfma_i32_32x32x4_2b_i8 v[0:31], v0, v1, v[34:65] +# CHECK-NEXT: - - - - - - 16.00 v_mfma_i32_32x32x4_2b_i8 a[0:31], v0, v1, a[34:65] +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_f16 v[10:13], a[2:3], v[4:7], v0 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_f16 a[10:13], v[2:3], a[4:7], v1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x16_f16 a[10:25], v[2:3], a[4:7], v3 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_bf16 a[10:13], v[2:3], a[4:7], v5 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v8 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v9 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v10 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v11 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf8_bf8 v[0:3], v[2:3], v[4:5], v[0:3] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf8_fp8 v[0:3], v[2:3], v[4:5], v[0:3] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_fp8_bf8 v[0:3], v[2:3], v[4:5], v[0:3] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_fp8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_fp8_fp8 v[0:3], v[2:3], v[4:5], v[0:3] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_fp8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_bf8_bf8 v[0:15], v[2:3], v[4:5], v[0:15] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_fp8_bf8 v[0:15], v[2:3], v[4:5], v[0:15] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_bf8_fp8 v[0:15], v[2:3], v[4:5], v[0:15] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_fp8_fp8 v[0:15], v[2:3], v[4:5], v[0:15] +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_bf8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_bf8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_fp8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_fp8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_bf8_bf8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_bf8_fp8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_fp8_bf8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_fp8_fp8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/independent-load-stores.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/independent-load-stores.s index dd7ac2734318f..f70ce42d115f2 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/independent-load-stores.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/independent-load-stores.s @@ -68,20 +68,20 @@ # ALL: Resource pressure per iteration: # ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# ALL-NEXT: 2.00 2.00 3.33 3.33 5.00 2.00 2.00 5.00 5.00 5.00 2.00 3.34 - +# ALL-NEXT: 2.00 2.00 3.33 3.33 5.00 2.00 2.00 5.00 5.00 5.00 3.34 2.00 - # ALL: Resource pressure by instruction: # ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# ALL-NEXT: - - 0.33 0.33 - - - - 1.00 1.00 1.00 0.34 - addq $44, 64(%r14) -# ALL-NEXT: - - 0.33 0.34 1.00 - 1.00 1.00 - - - 0.33 - addq $44, 128(%r14) -# ALL-NEXT: - - 0.34 0.33 - 1.00 - - 1.00 1.00 - 0.33 - addq $44, 192(%r14) -# ALL-NEXT: - 1.00 0.33 0.33 1.00 - - 1.00 - - - 0.34 - addq $44, 256(%r14) -# ALL-NEXT: 1.00 - 0.33 0.34 - - - - 1.00 1.00 - 0.33 - addq $44, 320(%r14) -# ALL-NEXT: - - 0.34 0.33 1.00 - - 1.00 - - 1.00 0.33 - addq $44, 384(%r14) -# ALL-NEXT: - - 0.33 0.33 - - 1.00 - 1.00 1.00 - 0.34 - addq $44, 448(%r14) -# ALL-NEXT: - - 0.33 0.34 1.00 1.00 - 1.00 - - - 0.33 - addq $44, 512(%r14) -# ALL-NEXT: - 1.00 0.34 0.33 - - - - 1.00 1.00 - 0.33 - addq $44, 576(%r14) -# ALL-NEXT: 1.00 - 0.33 0.33 1.00 - - 1.00 - - - 0.34 - addq $44, 640(%r14) +# ALL-NEXT: - - 0.33 0.33 - - - - 1.00 1.00 0.34 1.00 - addq $44, 64(%r14) +# ALL-NEXT: - - 0.33 0.34 1.00 - 1.00 1.00 - - 0.33 - - addq $44, 128(%r14) +# ALL-NEXT: - - 0.34 0.33 - 1.00 - - 1.00 1.00 0.33 - - addq $44, 192(%r14) +# ALL-NEXT: - 1.00 0.33 0.33 1.00 - - 1.00 - - 0.34 - - addq $44, 256(%r14) +# ALL-NEXT: 1.00 - 0.33 0.34 - - - - 1.00 1.00 0.33 - - addq $44, 320(%r14) +# ALL-NEXT: - - 0.34 0.33 1.00 - - 1.00 - - 0.33 1.00 - addq $44, 384(%r14) +# ALL-NEXT: - - 0.33 0.33 - - 1.00 - 1.00 1.00 0.34 - - addq $44, 448(%r14) +# ALL-NEXT: - - 0.33 0.34 1.00 1.00 - 1.00 - - 0.33 - - addq $44, 512(%r14) +# ALL-NEXT: - 1.00 0.34 0.33 - - - - 1.00 1.00 0.33 - - addq $44, 576(%r14) +# ALL-NEXT: 1.00 - 0.33 0.33 1.00 - - 1.00 - - 0.34 - - addq $44, 640(%r14) # ALL: Timeline view: diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-adx.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-adx.s index 5a7563d461cd9..cd46bd31d5fd4 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-adx.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-adx.s @@ -46,15 +46,15 @@ adox (%rbx), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 4.00 - 1.33 1.33 - - 4.00 - - - - 1.33 - +# CHECK-NEXT: 4.00 - 1.33 1.33 - - 4.00 - - - 1.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcxl %ebx, %ecx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adcxl (%rbx), %ecx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adcxl (%rbx), %ecx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcxq %rbx, %rcx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adcxq (%rbx), %rcx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adcxq (%rbx), %rcx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adoxl %ebx, %ecx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adoxl (%rbx), %ecx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adoxl (%rbx), %ecx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adoxq %rbx, %rcx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adoxq (%rbx), %rcx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adoxq (%rbx), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-aes.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-aes.s index 9384488f06781..330d2e0952e92 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-aes.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-aes.s @@ -58,19 +58,19 @@ aeskeygenassist $22, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 17.33 10.33 2.00 2.00 - 9.33 2.00 - - - - 2.00 - +# CHECK-NEXT: 17.33 10.33 2.00 2.00 - 9.33 2.00 - - - 2.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - aesdec %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - aesdec (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - aesdec (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - aesdeclast %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - aesdeclast (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - aesdeclast (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - aesenc %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - aesenc (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - aesenc (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - aesenclast %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - aesenclast (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - aesenclast (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - aesimc %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - aesimc (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - aesimc (%rax), %xmm2 # CHECK-NEXT: 5.83 2.33 - - - 4.83 1.00 - - - - - - aeskeygenassist $22, %xmm0, %xmm2 -# CHECK-NEXT: 5.50 2.00 0.33 0.33 - 4.50 1.00 - - - - 0.33 - aeskeygenassist $22, (%rax), %xmm2 +# CHECK-NEXT: 5.50 2.00 0.33 0.33 - 4.50 1.00 - - - 0.33 - - aeskeygenassist $22, (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avx1.s index dc8ad8e46a777..2c9a7c0aebb99 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avx1.s @@ -1739,427 +1739,427 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 310.90 275.73 107.33 107.33 19.50 277.73 8.90 18.83 18.50 18.50 0.73 107.00 - +# CHECK-NEXT: 310.90 275.73 107.33 107.33 19.50 277.73 8.90 18.83 18.50 18.50 107.00 0.73 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddsubpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddsubpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddsubpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddsubpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddsubpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddsubps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddsubps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddsubps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddsubps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddsubps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesdec %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesdec (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesdec (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesdeclast %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesdeclast (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesdeclast (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesenc %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesenc (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesenc (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesenclast %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesenclast (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesenclast (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vaesimc %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vaesimc (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vaesimc (%rax), %xmm2 # CHECK-NEXT: 5.83 2.33 - - - 4.83 1.00 - - - - - - vaeskeygenassist $22, %xmm0, %xmm2 -# CHECK-NEXT: 5.50 2.00 0.33 0.33 - 4.50 1.00 - - - - 0.33 - vaeskeygenassist $22, (%rax), %xmm2 +# CHECK-NEXT: 5.50 2.00 0.33 0.33 - 4.50 1.00 - - - 0.33 - - vaeskeygenassist $22, (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vblendpd $11, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vblendpd $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vblendpd $11, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vblendpd $11, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vblendpd $11, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vblendpd $11, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vblendps $11, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vblendps $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vblendps $11, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vblendps $11, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vblendps $11, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vblendps $11, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 1.00 - - - 1.00 - - - - - - - vblendvpd %xmm3, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - vblendvpd %xmm3, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - vblendvpd %xmm3, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - 1.00 - - - - - - - vblendvpd %ymm3, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - vblendvpd %ymm3, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - vblendvpd %ymm3, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 1.00 - - - 1.00 - - - - - - - vblendvps %xmm3, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - vblendvps %xmm3, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - vblendvps %xmm3, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - 1.00 - - - - - - - vblendvps %ymm3, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - vblendvps %ymm3, (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastf128 (%rax), %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastsd (%rax), %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastss (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastss (%rax), %ymm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - vblendvps %ymm3, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastf128 (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastsd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastss (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastss (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcmpeqpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcmpeqpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcmpeqpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcmpeqpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcmpeqpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcmpeqpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcmpeqps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcmpeqps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcmpeqps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcmpeqps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcmpeqps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcmpeqps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcmpeqsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcmpeqsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcmpeqsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcmpeqss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcmpeqss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcmpeqss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcomisd %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcomisd (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcomisd (%rax), %xmm1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcomiss %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcomiss (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcomiss (%rax), %xmm1 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtdq2pd %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtdq2ps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtdq2ps %ymm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dqx (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dqx (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2dq %ymm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dqy (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dqy (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2psx (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2ps %ymm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2dq %ymm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2pd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvtsd2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvtsd2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtsd2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtsd2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtsd2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtsd2si (%rax), %rcx # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtsd2ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtsd2ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtsd2ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtsi2sd %ecx, %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtsi2sd %rcx, %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtsi2sdl (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtsi2sdq (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtsi2sdl (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtsi2sdq (%rax), %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtsi2ss %ecx, %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 2.00 - - - - - - - vcvtsi2ss %rcx, %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtsi2ssl (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtsi2ssq (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtsi2ssl (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtsi2ssq (%rax), %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtss2sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtss2sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtss2sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvtss2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - 1.00 - - - - - - - vcvtss2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtss2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtss2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtss2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtss2si (%rax), %rcx # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dqx (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dqx (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2dq %ymm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dqy (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dqy (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2dq %ymm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax), %ymm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvttsd2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvttsd2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttsd2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttsd2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttsd2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttsd2si (%rax), %rcx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvttss2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - 1.00 - - - - - - - vcvttss2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttss2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttss2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttss2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttss2si (%rax), %rcx # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.50 - - - 0.50 - - - - - - - vdppd $22, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.50 0.33 0.33 - 0.50 - - - - - 0.33 - vdppd $22, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.50 0.33 0.33 - 0.50 - - - - 0.33 - - vdppd $22, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.50 2.00 - - - 2.00 0.50 - - - - - - vdpps $22, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.50 2.00 0.33 0.33 - 2.00 0.50 - - - - 0.33 - vdpps $22, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.50 2.00 0.33 0.33 - 2.00 0.50 - - - 0.33 - - vdpps $22, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.50 2.00 - - - 2.00 0.50 - - - - - - vdpps $22, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.50 2.00 0.33 0.33 - 2.00 0.50 - - - - 0.33 - vdpps $22, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1.50 2.00 0.33 0.33 - 2.00 0.50 - - - 0.33 - - vdpps $22, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vextractf128 $1, %ymm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vextractf128 $1, %ymm0, (%rax) # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vextractps $1, %xmm0, %ecx # CHECK-NEXT: - - - - 0.50 1.00 - 0.50 0.50 0.50 - - - vextractps $1, %xmm0, (%rax) # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhaddpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhaddpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhaddpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhaddpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhaddpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhaddpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhaddps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhaddps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhaddps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhaddps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhaddps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhsubpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhsubpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhsubpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhsubpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhsubpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhsubps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhsubps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhsubps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhsubps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhsubps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinsertf128 $1, %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vinsertf128 $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vinsertf128 $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinsertps $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vinsertps $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vlddqu (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vlddqu (%rax), %ymm2 -# CHECK-NEXT: 1.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - vldmxcsr (%rax) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vinsertps $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vlddqu (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vlddqu (%rax), %ymm2 +# CHECK-NEXT: 1.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - vldmxcsr (%rax) # CHECK-NEXT: - - 0.33 0.33 1.00 - - 0.33 - - - - - vmaskmovdqu %xmm0, %xmm1 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmaskmovpd (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmaskmovpd (%rax), %ymm0, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmaskmovpd (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmaskmovpd (%rax), %ymm0, %ymm2 # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vmaskmovpd %xmm0, %xmm1, (%rax) # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vmaskmovpd %ymm0, %ymm1, (%rax) -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmaskmovps (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmaskmovps (%rax), %ymm0, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmaskmovps (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmaskmovps (%rax), %ymm0, %ymm2 # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vmaskmovps %xmm0, %xmm1, (%rax) # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vmaskmovps %ymm0, %ymm1, (%rax) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovapd %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovapd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovapd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovapd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovapd %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovapd %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovapd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovapd (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovaps %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovaps %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovaps (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovaps (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovaps %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovaps %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovaps (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovaps (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovd %eax, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmovd %xmm0, %ecx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovd %xmm0, (%rax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovddup %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovddup (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovddup (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovddup %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovddup (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovddup (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovdqa %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqa (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqa (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovdqa %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqa (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqa (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovhlps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovlhps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovhpd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vmovhpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vmovhpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovhps %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vmovhps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vmovhps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovlpd %xmm0, (%rax) -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vmovlpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vmovlpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovlps %xmm0, (%rax) -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vmovlps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vmovlps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmovmskpd %xmm0, %ecx # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmovmskpd %ymm0, %ecx # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmovmskps %xmm0, %ecx # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmovmskps %ymm0, %ecx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovntdq %xmm0, (%rax) # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovntdq %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovntdqa (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovntdqa (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovntdqa (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovntdqa (%rax), %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovntpd %xmm0, (%rax) # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovntpd %ymm0, (%rax) # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovntps %xmm0, (%rax) # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovntps %ymm0, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovq %xmm0, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovq %rax, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovq (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovq (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmovq %xmm0, %rcx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovq %xmm0, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovsd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovsd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovsd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovshdup %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovshdup (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovshdup (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovshdup %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovshdup (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovshdup (%rax), %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovsldup %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovsldup (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovsldup (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovsldup %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovsldup (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovsldup (%rax), %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovss %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovss (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovss (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovupd %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovupd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovupd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovupd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovupd %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovupd %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovupd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovupd (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovups %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovups %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovups (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovups (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovups %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovups %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovups (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovups (%rax), %ymm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vmpsadbw $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 1.50 - - - - - 0.33 - vmpsadbw $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 1.50 - - - - 0.33 - - vmpsadbw $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsw (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackssdw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackssdw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackssdw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpacksswb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpacksswb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpacksswb (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackusdw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackusdw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackusdw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackuswb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackuswb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackuswb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpalignr $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpalignr $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpalignr $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpand %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpand (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpand (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpandn %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpandn (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpandn (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - 1.00 - - - - - - - vpblendvb %xmm3, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - vpblendvb %xmm3, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - vpblendvb %xmm3, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpblendw $11, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpblendw $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpblendw $11, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpclmulqdq $11, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpclmulqdq $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpclmulqdq $11, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 4.17 1.67 - - - 1.67 0.50 - - - - - - vpcmpestri $1, %xmm0, %xmm2 -# CHECK-NEXT: 3.83 1.33 0.33 0.33 - 1.33 0.50 - - - - 0.33 - vpcmpestri $1, (%rax), %xmm2 +# CHECK-NEXT: 3.83 1.33 0.33 0.33 - 1.33 0.50 - - - 0.33 - - vpcmpestri $1, (%rax), %xmm2 # CHECK-NEXT: 4.50 2.00 - - - 2.00 0.50 - - - - - - vpcmpestrm $1, %xmm0, %xmm2 -# CHECK-NEXT: 4.17 1.67 0.33 0.33 - 1.67 0.50 - - - - 0.33 - vpcmpestrm $1, (%rax), %xmm2 +# CHECK-NEXT: 4.17 1.67 0.33 0.33 - 1.67 0.50 - - - 0.33 - - vpcmpestrm $1, (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpgtb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpgtb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpgtb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpgtd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpgtd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpgtd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpgtw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpgtw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpgtw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 3.00 - - - - - - - - - - - - vpcmpistri $1, %xmm0, %xmm2 -# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - - 0.33 - vpcmpistri $1, (%rax), %xmm2 +# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - 0.33 - - vpcmpistri $1, (%rax), %xmm2 # CHECK-NEXT: 3.00 - - - - - - - - - - - - vpcmpistrm $1, %xmm0, %xmm2 -# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - - 0.33 - vpcmpistrm $1, (%rax), %xmm2 +# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - 0.33 - - vpcmpistrm $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vperm2f128 $1, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vperm2f128 $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vperm2f128 $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $1, (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd $1, %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $1, (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $1, (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $1, (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps $1, %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $1, (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $1, (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - vpextrb $1, %xmm0, %ecx # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - vpextrb $1, %xmm0, (%rax) # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - vpextrd $1, %xmm0, %ecx @@ -2169,268 +2169,268 @@ vzeroupper # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - vpextrw $1, %xmm0, %ecx # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - vpextrw $1, %xmm0, (%rax) # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphaddd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphaddd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphaddd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 1.50 - - - 1.00 - - - - - - - vphaddsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - - 0.33 - vphaddsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - 0.33 - - vphaddsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphaddw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphaddw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphaddw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vphminposuw %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vphminposuw (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vphminposuw (%rax), %xmm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphsubd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphsubd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphsubd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 1.50 - - - 1.00 - - - - - - - vphsubsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - - 0.33 - vphsubsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - 0.33 - - vphsubsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphsubw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphsubw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphsubw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpinsrb $1, %eax, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpinsrb $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpinsrb $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpinsrd $1, %eax, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpinsrd $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpinsrd $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpinsrq $1, %rax, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpinsrq $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpinsrq $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpinsrw $1, %eax, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpinsrw $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpinsrw $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddubsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddubsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddubsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddwd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddwd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddwd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxub %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxub (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxub (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxud %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxud (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxud (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxuw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxuw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxuw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminub %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminub (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminub (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminud %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminud (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminud (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminuw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminuw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminuw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmovmskb %xmm0, %ecx # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbw %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbw (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxwd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxwd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxwd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxwq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxwq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxwq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbw %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbw (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxwd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxwd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxwd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxwq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxwq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxwq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmuldq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmuldq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmuldq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhrsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhrsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhrsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhuw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhuw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhuw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vpmulld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmullw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmullw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmullw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmuludq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmuludq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmuludq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpor %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpor (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpor (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpsadbw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpsadbw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpsadbw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufb (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufd $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $1, (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufhw $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufhw $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufhw $1, (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshuflw $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshuflw $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshuflw $1, (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsignb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsignb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsignb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsignd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsignd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsignd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsignw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsignw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsignw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpslld $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpslld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpslld (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpslld (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpslldq $1, %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllq $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsllq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllw $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsllw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrad $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsrad %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrad (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrad (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsraw $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsraw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrld $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsrld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrld (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrld (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpsrldq $1, %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlq $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsrlq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlw $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsrlw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vptest %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vptest (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vptest (%rax), %xmm1 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vptest %ymm0, %ymm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vptest (%rax), %ymm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vptest (%rax), %ymm1 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhbw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhbw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhbw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhdq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhqdq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhqdq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhqdq (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhwd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhwd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhwd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklbw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklbw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklbw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckldq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklqdq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklqdq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklqdq (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklwd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklwd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklwd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpxor %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpxor (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpxor (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrcpps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrcpps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrcpps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrcpps %ymm0, %ymm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrcpps (%rax), %ymm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrcpps (%rax), %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrcpss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrcpss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrcpss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vroundpd $1, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vroundpd $1, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vroundpd $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vroundpd $1, %ymm0, %ymm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vroundpd $1, (%rax), %ymm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vroundpd $1, (%rax), %ymm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vroundps $1, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vroundps $1, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vroundps $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vroundps $1, %ymm0, %ymm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vroundps $1, (%rax), %ymm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vroundps $1, (%rax), %ymm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vroundsd $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vroundsd $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vroundsd $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vroundss $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vroundss $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vroundss $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrsqrtps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrsqrtps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrsqrtps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrsqrtps %ymm0, %ymm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrsqrtps (%rax), %ymm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrsqrtps (%rax), %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrsqrtss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrsqrtss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrsqrtss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vshufpd $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vshufpd $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vshufpd $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vshufpd $1, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vshufpd $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vshufpd $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vshufps $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vshufps $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vshufps $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vshufps $1, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vshufps $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vshufps $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtpd %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtpd %ymm0, %ymm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax), %ymm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax), %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtps %ymm0, %ymm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax), %ymm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax), %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.50 - - - 0.50 - 0.50 0.50 0.50 0.50 - - - vstmxcsr (%rax) # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vtestpd %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vtestpd (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vtestpd (%rax), %xmm1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vtestpd %ymm0, %ymm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vtestpd (%rax), %ymm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vtestpd (%rax), %ymm1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vtestps %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vtestps (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vtestps (%rax), %xmm1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vtestps %ymm0, %ymm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vtestps (%rax), %ymm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vtestps (%rax), %ymm1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vucomisd %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vucomisd (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vucomisd (%rax), %xmm1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vucomiss %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vucomiss (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vucomiss (%rax), %xmm1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 2.23 4.07 - - - 1.07 1.90 - - - 0.73 - - vzeroall +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2.23 4.07 - - - 1.07 1.90 - - - - 0.73 - vzeroall # CHECK-NEXT: - - - - - - - - - - - - - vzeroupper diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avx2.s index 96f66d1aad6c5..8c2e4911bf73d 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avx2.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avx2.s @@ -779,308 +779,308 @@ vpxor (%rax), %ymm1, %ymm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 111.00 130.00 65.33 65.33 2.50 135.00 - 2.50 2.50 2.50 - 65.33 - +# CHECK-NEXT: 111.00 130.00 65.33 65.33 2.50 135.00 - 2.50 2.50 2.50 65.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcasti128 (%rax), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcasti128 (%rax), %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastsd %xmm0, %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastss %xmm0, %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vextracti128 $1, %ymm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vextracti128 $1, %ymm0, (%rax) -# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - - 0.67 - vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 2.67 2.67 - 1.33 - - - - - 2.67 - vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2 -# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - - 0.67 - vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2 -# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - - 0.67 - vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2 +# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - 0.67 - - vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 2.67 2.67 - 1.33 - - - - 2.67 - - vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - 0.67 - - vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - 0.67 - - vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinserti128 $1, %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vinserti128 $1, (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovntdqa (%rax), %ymm0 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vinserti128 $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovntdqa (%rax), %ymm0 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vmpsadbw $1, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 1.50 - - - - - 0.33 - vmpsadbw $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 1.50 - - - - 0.33 - - vmpsadbw $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsb %ymm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsb (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsb (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsd %ymm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsd (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsd (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsw %ymm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsw (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsw (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackssdw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackssdw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackssdw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpacksswb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpacksswb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpacksswb (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackusdw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackusdw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackusdw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackuswb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackuswb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackuswb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpalignr $1, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpalignr $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpalignr $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpand %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpand (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpand (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpandn %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpandn (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpandn (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendd $11, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendd $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendd $11, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendd $11, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendd $11, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendd $11, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 1.00 - - - 1.00 - - - - - - - vpblendvb %ymm3, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - vpblendvb %ymm3, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - vpblendvb %ymm3, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpblendw $11, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpblendw $11, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpblendw $11, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %xmm0, %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastb (%rax), %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastb (%rax), %xmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %xmm0, %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastb (%rax), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastb (%rax), %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastd %xmm0, %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastd (%rax), %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastd (%rax), %xmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastd %xmm0, %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastd (%rax), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastd (%rax), %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastq %xmm0, %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastq (%rax), %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastq (%rax), %xmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastq %xmm0, %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastq (%rax), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastq (%rax), %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %xmm0, %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastw (%rax), %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastw (%rax), %xmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %xmm0, %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastw (%rax), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastw (%rax), %ymm0 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpgtb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpgtb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpgtb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpgtd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpgtd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpgtd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpgtw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpgtw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpgtw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vperm2i128 $1, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vperm2i128 $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vperm2i128 $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd $1, %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $1, (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $1, (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq $1, %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $1, (%rax), %ymm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 2.67 2.67 - 1.33 - - - - - 2.67 - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2 -# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - - 0.67 - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2 -# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - - 0.67 - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2 -# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - - 0.67 - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $1, (%rax), %ymm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 2.67 2.67 - 1.33 - - - - 2.67 - - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - 0.67 - - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2 +# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - 0.67 - - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2 +# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - 0.67 - - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphaddd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphaddd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphaddd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 1.50 - - - 1.00 - - - - - - - vphaddsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - - 0.33 - vphaddsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - 0.33 - - vphaddsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphaddw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphaddw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphaddw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphsubd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphsubd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphsubd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 1.50 - - - 1.00 - - - - - - - vphsubsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - - 0.33 - vphsubsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - 0.33 - - vphsubsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphsubw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphsubw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphsubw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddubsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddubsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddubsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddwd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddwd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpmaskmovd (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpmaskmovd (%rax), %ymm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddwd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpmaskmovd (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpmaskmovd (%rax), %ymm0, %ymm2 # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vpmaskmovd %xmm0, %xmm1, (%rax) # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vpmaskmovd %ymm0, %ymm1, (%rax) -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpmaskmovq (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpmaskmovq (%rax), %ymm0, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpmaskmovq (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpmaskmovq (%rax), %ymm0, %ymm2 # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vpmaskmovq %xmm0, %xmm1, (%rax) # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vpmaskmovq %ymm0, %ymm1, (%rax) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxub %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxub (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxub (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxud %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxud (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxud (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxuw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxuw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxuw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminub %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminub (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminub (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminud %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminud (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminud (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminuw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminuw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminuw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmovmskb %ymm0, %ecx # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbd %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbd (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbq %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbq (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbq (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbw %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbw (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbw (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxdq %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxdq (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxdq (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwd %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwd (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwq %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwq (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwq (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbd %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbd (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbq %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbq (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbq (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbw %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbw (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbw (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxdq %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxdq (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxdq (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwd %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwd (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwq %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwq (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwq (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmuldq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmuldq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmuldq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhrsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhrsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhrsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhuw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhuw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhuw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vpmulld %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmullw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmullw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmullw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmuludq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmuludq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmuludq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpor %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpor (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpor (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpsadbw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpsadbw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpsadbw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufb (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufd $1, %ymm0, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $1, (%rax), %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $1, (%rax), %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufhw $1, %ymm0, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufhw $1, (%rax), %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufhw $1, (%rax), %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshuflw $1, %ymm0, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshuflw $1, (%rax), %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshuflw $1, (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsignb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsignb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsignb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsignd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsignd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsignd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsignw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsignw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsignw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpslld $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpslld %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpslld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpslld (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpslldq $1, %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllq $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsllq %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllw $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsllw %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrad $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsrad %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrad (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrad (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsravd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsravd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsravd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsravd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsravd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsravd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsraw $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsraw %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrld $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsrld %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrld (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpsrldq $1, %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlq $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsrlq %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlw $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsrlw %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhbw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhbw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhbw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhdq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhqdq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhqdq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhqdq (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhwd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhwd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhwd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklbw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklbw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklbw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckldq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklqdq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklqdq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklqdq (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklwd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklwd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklwd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpxor %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpxor (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpxor (%rax), %ymm1, %ymm2 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avxgfni.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avxgfni.s index 2f9fe5dd23a17..ae2185aae1a23 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avxgfni.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avxgfni.s @@ -58,19 +58,19 @@ vgf2p8mulb (%rax), %ymm1, %ymm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 6.00 6.00 2.00 2.00 - - - - - - - 2.00 - +# CHECK-NEXT: 6.00 6.00 2.00 2.00 - - - - - - 2.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineinvqb $0, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineinvqb $0, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineqb $0, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineqb $0, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8mulb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8mulb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8mulb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8mulb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8mulb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8mulb (%rax), %ymm1, %ymm2 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avxvnni.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avxvnni.s index 5c8d5e74e7eda..8152d18f56c30 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avxvnni.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avxvnni.s @@ -68,23 +68,23 @@ vpdpwssds (%rax), %ymm1, %ymm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 8.00 8.00 2.67 2.67 - - - - - - - 2.67 - +# CHECK-NEXT: 8.00 8.00 2.67 2.67 - - - - - - 2.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusds %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusds %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssds %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssds %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %ymm1, %ymm2 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-bmi1.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-bmi1.s index 4ed882a37a68e..16c4fdf7e1b4c 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-bmi1.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-bmi1.s @@ -93,33 +93,33 @@ tzcnt (%rax), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 2.00 15.33 4.33 4.33 - 5.33 2.00 - - - 5.33 4.33 - +# CHECK-NEXT: 2.00 15.33 4.33 4.33 - 5.33 2.00 - - - 4.33 5.33 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - andnl %eax, %ebx, %ecx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - andnl %eax, %ebx, %ecx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - andnl (%rax), %ebx, %ecx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - andnq %rax, %rbx, %rcx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - andnq %rax, %rbx, %rcx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - andnq (%rax), %rbx, %rcx # CHECK-NEXT: 0.50 1.00 - - - - 0.50 - - - - - - bextrl %eax, %ebx, %ecx -# CHECK-NEXT: 0.50 1.00 0.33 0.33 - - 0.50 - - - - 0.33 - bextrl %eax, (%rbx), %ecx +# CHECK-NEXT: 0.50 1.00 0.33 0.33 - - 0.50 - - - 0.33 - - bextrl %eax, (%rbx), %ecx # CHECK-NEXT: 0.50 1.00 - - - - 0.50 - - - - - - bextrq %rax, %rbx, %rcx -# CHECK-NEXT: 0.50 1.00 0.33 0.33 - - 0.50 - - - - 0.33 - bextrq %rax, (%rbx), %rcx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - blsil %eax, %ecx +# CHECK-NEXT: 0.50 1.00 0.33 0.33 - - 0.50 - - - 0.33 - - bextrq %rax, (%rbx), %rcx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - blsil %eax, %ecx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - blsil (%rax), %ecx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - blsiq %rax, %rcx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - blsiq %rax, %rcx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - blsiq (%rax), %rcx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - blsmskl %eax, %ecx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - blsmskl %eax, %ecx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - blsmskl (%rax), %ecx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - blsmskq %rax, %rcx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - blsmskq %rax, %rcx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - blsmskq (%rax), %rcx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - blsrl %eax, %ecx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - blsrl %eax, %ecx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - blsrl (%rax), %ecx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - blsrq %rax, %rcx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - blsrq %rax, %rcx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - blsrq (%rax), %rcx # CHECK-NEXT: - 1.00 - - - - - - - - - - - tzcntw %ax, %cx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - tzcntw (%rax), %cx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - tzcntw (%rax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - tzcntl %eax, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - tzcntl (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - tzcntl (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - tzcntq %rax, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - tzcntq (%rax), %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - tzcntq (%rax), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-bmi2.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-bmi2.s index 559ca83906cb7..a444369434dda 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-bmi2.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-bmi2.s @@ -108,39 +108,39 @@ shrx %rax, (%rbx), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 9.40 16.40 5.33 5.33 - 2.40 9.40 - - - 0.40 5.33 - +# CHECK-NEXT: 9.40 16.40 5.33 5.33 - 2.40 9.40 - - - 5.33 0.40 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 1.00 - - - - - - - - - - - bzhil %eax, %ebx, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bzhil %eax, (%rbx), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bzhil %eax, (%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - bzhiq %rax, %rbx, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bzhiq %rax, (%rbx), %rcx -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - mulxl %eax, %ebx, %ecx -# CHECK-NEXT: 0.70 1.20 0.33 0.33 - 0.20 0.70 - - - 0.20 0.33 - mulxl (%rax), %ebx, %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bzhiq %rax, (%rbx), %rcx +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - mulxl %eax, %ebx, %ecx +# CHECK-NEXT: 0.70 1.20 0.33 0.33 - 0.20 0.70 - - - 0.33 0.20 - mulxl (%rax), %ebx, %ecx # CHECK-NEXT: - 1.00 - - - 1.00 - - - - - - - mulxq %rax, %rbx, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - mulxq (%rax), %rbx, %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - mulxq (%rax), %rbx, %rcx # CHECK-NEXT: - 1.00 - - - - - - - - - - - pdepl %eax, %ebx, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - pdepl (%rax), %ebx, %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - pdepl (%rax), %ebx, %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - pdepq %rax, %rbx, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - pdepq (%rax), %rbx, %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - pdepq (%rax), %rbx, %rcx # CHECK-NEXT: - 1.00 - - - - - - - - - - - pextl %eax, %ebx, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - pextl (%rax), %ebx, %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - pextl (%rax), %ebx, %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - pextq %rax, %rbx, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - pextq (%rax), %rbx, %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - pextq (%rax), %rbx, %rcx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - rorxl $1, %eax, %ecx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - rorxl $1, (%rax), %ecx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - rorxl $1, (%rax), %ecx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - rorxq $1, %rax, %rcx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - rorxq $1, (%rax), %rcx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - rorxq $1, (%rax), %rcx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarxl %eax, %ebx, %ecx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - sarxl %eax, (%rbx), %ecx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - sarxl %eax, (%rbx), %ecx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarxq %rax, %rbx, %rcx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - sarxq %rax, (%rbx), %rcx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - sarxq %rax, (%rbx), %rcx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlxl %eax, %ebx, %ecx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - shlxl %eax, (%rbx), %ecx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - shlxl %eax, (%rbx), %ecx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlxq %rax, %rbx, %rcx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - shlxq %rax, (%rbx), %rcx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - shlxq %rax, (%rbx), %rcx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrxl %eax, %ebx, %ecx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - shrxl %eax, (%rbx), %ecx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - shrxl %eax, (%rbx), %ecx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrxq %rax, %rbx, %rcx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - shrxq %rax, (%rbx), %rcx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - shrxq %rax, (%rbx), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-clflushopt.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-clflushopt.s index e61cc06951ae5..f3b64423348a7 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-clflushopt.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-clflushopt.s @@ -31,8 +31,8 @@ clflushopt (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 0.20 - - +# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 - 0.20 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 0.20 - - clflushopt (%rax) +# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 - 0.20 - clflushopt (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-clwb.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-clwb.s index d35eadcc3f9d0..627705311c1a7 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-clwb.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-clwb.s @@ -31,8 +31,8 @@ clwb (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 0.20 - - +# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 - 0.20 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 0.20 - - clwb (%rax) +# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 - 0.20 - clwb (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-cmov.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-cmov.s index 87a0e070096c2..5a3d8d0caf5ed 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-cmov.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-cmov.s @@ -226,7 +226,7 @@ cmovgq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 48.00 - 16.00 16.00 - - 48.00 - - - - 16.00 - +# CHECK-NEXT: 48.00 - 16.00 16.00 - - 48.00 - - - 16.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -246,22 +246,22 @@ cmovgq (%rax), %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovgew %si, %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovlew %si, %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovgw %si, %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovow (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnow (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovbw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovaew (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovew (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnew (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovbew (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovaw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovsw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnsw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovpw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnpw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovlw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovgew (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovlew (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovgw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovow (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnow (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovbw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovaew (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovew (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnew (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovbew (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovaw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovsw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnsw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovpw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnpw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovlw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovgew (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovlew (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovgw (%rax), %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovol %esi, %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovnol %esi, %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovbl %esi, %edi @@ -278,22 +278,22 @@ cmovgq (%rax), %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovgel %esi, %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovlel %esi, %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovgl %esi, %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovol (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnol (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovbl (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovael (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovel (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnel (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovbel (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmoval (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovsl (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnsl (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovpl (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnpl (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovll (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovgel (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovlel (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovgl (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovol (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnol (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovbl (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovael (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovel (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnel (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovbel (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmoval (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovsl (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnsl (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovpl (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnpl (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovll (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovgel (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovlel (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovgl (%rax), %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovoq %rsi, %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovnoq %rsi, %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovbq %rsi, %rdi @@ -310,19 +310,19 @@ cmovgq (%rax), %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovgeq %rsi, %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovleq %rsi, %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovgq %rsi, %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovoq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnoq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovbq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovaeq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmoveq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovneq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovbeq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovaq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovsq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnsq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovpq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnpq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovlq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovgeq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovleq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovgq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovoq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnoq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovbq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovaeq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmoveq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovneq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovbeq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovaq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovsq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnsq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovpq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnpq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovlq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovgeq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovleq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovgq (%rax), %rdi diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-cmpxchg.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-cmpxchg.s index 93ef7797dfe36..1e5c8ba1afbb6 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-cmpxchg.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-cmpxchg.s @@ -37,11 +37,11 @@ lock cmpxchg16b (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 21.40 10.40 1.33 1.33 2.00 10.40 17.40 2.00 2.00 2.00 4.40 1.33 - +# CHECK-NEXT: 21.40 10.40 1.33 1.33 2.00 10.40 17.40 2.00 2.00 2.00 1.33 4.40 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 4.30 2.80 0.33 0.33 0.50 0.80 4.30 0.50 0.50 0.50 0.80 0.33 - cmpxchg8b (%rax) -# CHECK-NEXT: 6.40 2.40 0.33 0.33 0.50 4.40 4.40 0.50 0.50 0.50 1.40 0.33 - cmpxchg16b (%rax) -# CHECK-NEXT: 4.30 2.80 0.33 0.33 0.50 0.80 4.30 0.50 0.50 0.50 0.80 0.33 - lock cmpxchg8b (%rax) -# CHECK-NEXT: 6.40 2.40 0.33 0.33 0.50 4.40 4.40 0.50 0.50 0.50 1.40 0.33 - lock cmpxchg16b (%rax) +# CHECK-NEXT: 4.30 2.80 0.33 0.33 0.50 0.80 4.30 0.50 0.50 0.50 0.33 0.80 - cmpxchg8b (%rax) +# CHECK-NEXT: 6.40 2.40 0.33 0.33 0.50 4.40 4.40 0.50 0.50 0.50 0.33 1.40 - cmpxchg16b (%rax) +# CHECK-NEXT: 4.30 2.80 0.33 0.33 0.50 0.80 4.30 0.50 0.50 0.50 0.33 0.80 - lock cmpxchg8b (%rax) +# CHECK-NEXT: 6.40 2.40 0.33 0.33 0.50 4.40 4.40 0.50 0.50 0.50 0.33 1.40 - lock cmpxchg16b (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-f16c.s index 7e4eeebfdaca1..37cd0c25c7f4b 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-f16c.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-f16c.s @@ -48,14 +48,14 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 4.00 4.00 0.67 0.67 1.00 4.00 - 1.00 1.00 1.00 - 0.67 - +# CHECK-NEXT: 4.00 4.00 0.67 0.67 1.00 4.00 - 1.00 1.00 1.00 0.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtph2ps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtph2ps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtph2ps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtph2ps %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtph2ps (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2ph $0, %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - 0.50 - - 0.50 0.50 0.50 - - - vcvtps2ph $0, %xmm0, (%rax) # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2ph $0, %ymm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-fma.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-fma.s index 63fc8dbaa44b2..68430d3355558 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-fma.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-fma.s @@ -508,199 +508,199 @@ vfnmsub231ss (%rax), %xmm1, %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 96.00 96.00 32.00 32.00 - - - - - - - 32.00 - +# CHECK-NEXT: 96.00 96.00 32.00 32.00 - - - - - - 32.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub132pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub132pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub213pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub213pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub231pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub231pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub132ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub132ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub213ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub213ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub231ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub231ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub132pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub132pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub213pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub213pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub231pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub231pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub132ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub132ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub213ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub213ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub231ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub231ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub132sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub132sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub213sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub213sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub231sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub231sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub132ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub132ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub213ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub213ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub231ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub231ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd132pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd132pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd213pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd213pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd231pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd231pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd132ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd132ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd213ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd213ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd231ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd231ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd132pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd132pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd213pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd213pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd231pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd231pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd132ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd132ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd213ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd213ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd231ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd231ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd132sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd132sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd213sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd213sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd231sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd231sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd132ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd132ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd213ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd213ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd231ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd231ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub132pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub132pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub213pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub213pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub231pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub231pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub132ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub132ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub213ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub213ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub231ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub231ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub132sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub132sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub213sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub213sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub231sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub231sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub132ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub132ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub213ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub213ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub231ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub231ss (%rax), %xmm1, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-gfni.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-gfni.s index 322fcb420196e..45b52d8bbbfee 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-gfni.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-gfni.s @@ -43,13 +43,13 @@ gf2p8mulb (%rax), %xmm1 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 3.00 3.00 1.00 1.00 - - - - - - - 1.00 - +# CHECK-NEXT: 3.00 3.00 1.00 1.00 - - - - - - 1.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - gf2p8affineinvqb $0, %xmm0, %xmm1 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - gf2p8affineinvqb $0, (%rax), %xmm1 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - gf2p8affineinvqb $0, (%rax), %xmm1 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - gf2p8affineqb $0, %xmm0, %xmm1 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - gf2p8affineqb $0, (%rax), %xmm1 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - gf2p8affineqb $0, (%rax), %xmm1 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - gf2p8mulb %xmm0, %xmm1 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - gf2p8mulb (%rax), %xmm1 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - gf2p8mulb (%rax), %xmm1 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-lea.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-lea.s index 762b6d3caef2b..9e5a084eeb9d3 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-lea.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-lea.s @@ -301,142 +301,142 @@ lea 1024(%rax, %rbx, 2), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 9.00 144.00 - - - 9.00 9.00 - - - 9.00 - - +# CHECK-NEXT: 9.00 144.00 - - - 9.00 9.00 - - - - 9.00 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 0, %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 0, %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 0, %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 0, %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%eax), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%eax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%eax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%eax), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%rax), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%rax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%rax), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (,%ebx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (,%ebx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (,%ebx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (,%ebx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (,%rbx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (,%rbx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (,%rbx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (,%rbx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%eax,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%eax,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%eax,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%eax,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%rax,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%rax,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%rax,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%rax,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%eax,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%eax,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%eax,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%eax,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%rax,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%rax,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%rax,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%rax,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%eax,%ebx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%eax,%ebx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%eax,%ebx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%eax,%ebx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%rax,%rbx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%rax,%rbx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%rax,%rbx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%rax,%rbx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16, %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16, %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16, %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16, %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%eax), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%eax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%eax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%eax), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%rax), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%rax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%rax), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(,%ebx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(,%ebx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(,%ebx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(,%ebx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(,%rbx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(,%rbx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(,%rbx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(,%rbx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%eax,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%eax,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%eax,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%eax,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%rax,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%rax,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%rax,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%rax,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%eax,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%eax,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%eax,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%eax,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%rax,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%rax,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%rax,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%rax,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%eax,%ebx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%eax,%ebx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%eax,%ebx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%eax,%ebx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%rax,%rbx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%rax,%rbx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%rax,%rbx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%rax,%rbx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024, %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024, %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024, %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024, %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%eax), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%eax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%eax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%eax), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%rax), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%rax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%rax), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(,%ebx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(,%ebx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(,%ebx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(,%ebx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(,%rbx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(,%rbx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(,%rbx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(,%rbx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%eax,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%eax,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%eax,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%eax,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%rax,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%rax,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%rax,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%rax,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%eax,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%eax,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%eax,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%eax,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%rax,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%rax,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%rax,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%rax,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%eax,%ebx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%eax,%ebx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%eax,%ebx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%eax,%ebx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%rax,%rbx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%rax,%rbx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%rax,%rbx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%rax,%rbx,2), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-lzcnt.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-lzcnt.s index 68179d34d8a82..e4136b8f321dc 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-lzcnt.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-lzcnt.s @@ -43,13 +43,13 @@ lzcntq (%rax), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - 6.00 1.00 1.00 - - - - - - - 1.00 - +# CHECK-NEXT: - 6.00 1.00 1.00 - - - - - - 1.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 1.00 - - - - - - - - - - - lzcntw %cx, %cx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - lzcntw (%rax), %cx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - lzcntw (%rax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - lzcntl %eax, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - lzcntl (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - lzcntl (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - lzcntq %rax, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - lzcntq (%rax), %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - lzcntq (%rax), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-mmx.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-mmx.s index c62ea2963323d..27436b03bac79 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-mmx.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-mmx.s @@ -287,112 +287,112 @@ pxor (%rax), %mm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 75.50 - 15.33 15.33 1.00 40.00 0.50 1.00 1.00 1.00 - 15.33 - +# CHECK-NEXT: 75.50 - 15.33 15.33 1.00 40.00 0.50 1.00 1.00 1.00 15.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 5.50 - - - - 4.00 0.50 - - - - - - emms # CHECK-NEXT: - - - - - 1.00 - - - - - - - movd %eax, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movd (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movd (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - movd %mm0, %ecx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movd %mm0, (%rax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - movq %rax, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movq (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movq (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - movq %mm0, %rcx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movq %mm0, (%rax) # CHECK-NEXT: - - - - - 2.00 - - - - - - - packsswb %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - packsswb (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - packsswb (%rax), %mm2 # CHECK-NEXT: - - - - - 2.00 - - - - - - - packssdw %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - packssdw (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - packssdw (%rax), %mm2 # CHECK-NEXT: - - - - - 2.00 - - - - - - - packuswb %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - packuswb (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - packuswb (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - paddb %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - paddb (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - paddb (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - paddd %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - paddd (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - paddd (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - paddsb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - paddsb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - paddsb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - paddsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - paddsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - paddsw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - paddusb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - paddusb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - paddusb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - paddusw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - paddusw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - paddusw (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - paddw %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - paddw (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - paddw (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - pand %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - pand (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - pand (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - pandn %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - pandn (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - pandn (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpeqb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpeqb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpeqb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpeqd %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpeqd (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpeqd (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpeqw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpeqw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpeqw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpgtb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpgtb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpgtb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpgtd %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpgtd (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpgtd (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpgtw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpgtw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpgtw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmaddwd %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmaddwd (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmaddwd (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmulhw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmulhw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmulhw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmullw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmullw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmullw (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - por %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - por (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - por (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pslld $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pslld %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pslld (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pslld (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psllq $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psllq %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psllq (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psllq (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psllw $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psllw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psllw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psllw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrad $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrad %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psrad (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psrad (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psraw $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psraw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psraw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psraw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrld $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrld %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psrld (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psrld (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrlq $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrlq %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psrlq (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psrlq (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrlw $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrlw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psrlw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psrlw (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - psubb %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - psubb (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - psubb (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - psubd %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - psubd (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - psubd (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psubsb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psubsb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psubsb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psubsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psubsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psubsw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psubusb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psubusb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psubusb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psubusw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psubusw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psubusw (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - psubw %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - psubw (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - psubw (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - punpckhbw %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - punpckhbw (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - punpckhbw (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - punpckhdq %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - punpckhdq (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - punpckhdq (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - punpckhwd %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - punpckhwd (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - punpckhwd (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - punpcklbw %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - punpcklbw (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - punpcklbw (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - punpckldq %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - punpckldq (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - punpckldq (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - punpcklwd %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - punpcklwd (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - punpcklwd (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - pxor %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - pxor (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - pxor (%rax), %mm2 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-movbe.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-movbe.s index 3aa3122609563..025ea85be7206 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-movbe.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-movbe.s @@ -43,13 +43,13 @@ movbe (%rax), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 2.20 4.20 1.00 1.00 1.50 0.20 2.20 1.50 1.50 1.50 0.20 1.00 - +# CHECK-NEXT: 2.20 4.20 1.00 1.00 1.50 0.20 2.20 1.50 1.50 1.50 1.00 0.20 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 - - - 0.50 - 0.50 0.50 0.50 0.50 - - - movbew %cx, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 - 0.20 0.70 - - - 0.20 0.33 - movbew (%rax), %cx +# CHECK-NEXT: 0.70 0.20 0.33 0.33 - 0.20 0.70 - - - 0.33 0.20 - movbew (%rax), %cx # CHECK-NEXT: - 1.00 - - 0.50 - - 0.50 0.50 0.50 - - - movbel %ecx, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - movbel (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - movbel (%rax), %ecx # CHECK-NEXT: 0.50 1.00 - - 0.50 - 0.50 0.50 0.50 0.50 - - - movbeq %rcx, (%rax) -# CHECK-NEXT: 0.50 1.00 0.33 0.33 - - 0.50 - - - - 0.33 - movbeq (%rax), %rcx +# CHECK-NEXT: 0.50 1.00 0.33 0.33 - - 0.50 - - - 0.33 - - movbeq (%rax), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-pclmul.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-pclmul.s index 871035dbe34a6..e9a65bbe028f6 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-pclmul.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-pclmul.s @@ -33,9 +33,9 @@ pclmulqdq $11, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - pclmulqdq $11, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - pclmulqdq $11, (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - pclmulqdq $11, (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-popcnt.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-popcnt.s index 9428d9015e212..728fa7375e1d6 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-popcnt.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-popcnt.s @@ -43,13 +43,13 @@ popcntq (%rax), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - 6.00 1.00 1.00 - - - - - - - 1.00 - +# CHECK-NEXT: - 6.00 1.00 1.00 - - - - - - 1.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 1.00 - - - - - - - - - - - popcntw %cx, %cx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - popcntw (%rax), %cx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - popcntw (%rax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - popcntl %eax, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - popcntl (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - popcntl (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - popcntq %rax, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - popcntq (%rax), %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - popcntq (%rax), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-prefetchw.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-prefetchw.s index ff872dacaf326..257e9d54dabe4 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-prefetchw.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-prefetchw.s @@ -33,9 +33,9 @@ prefetchw (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - - 0.67 0.67 - - - - - - - 0.67 - +# CHECK-NEXT: - - 0.67 0.67 - - - - - - 0.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - prefetch (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - prefetchw (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - prefetch (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - prefetchw (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-rdrand.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-rdrand.s index b4a2252310804..774bbc5e981da 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-rdrand.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-rdrand.s @@ -35,10 +35,10 @@ rdrand %rax # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 17.30 27.30 1.00 1.00 - 14.30 11.30 - - - 1.80 1.00 - +# CHECK-NEXT: 17.30 27.30 1.00 1.00 - 14.30 11.30 - - - 1.00 1.80 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 5.77 9.10 0.33 0.33 - 4.77 3.77 - - - 0.60 0.33 - rdrandw %ax -# CHECK-NEXT: 5.77 9.10 0.33 0.33 - 4.77 3.77 - - - 0.60 0.33 - rdrandl %eax -# CHECK-NEXT: 5.77 9.10 0.33 0.33 - 4.77 3.77 - - - 0.60 0.33 - rdrandq %rax +# CHECK-NEXT: 5.77 9.10 0.33 0.33 - 4.77 3.77 - - - 0.33 0.60 - rdrandw %ax +# CHECK-NEXT: 5.77 9.10 0.33 0.33 - 4.77 3.77 - - - 0.33 0.60 - rdrandl %eax +# CHECK-NEXT: 5.77 9.10 0.33 0.33 - 4.77 3.77 - - - 0.33 0.60 - rdrandq %rax diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-rdseed.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-rdseed.s index 8b8aff25b5163..c7ccd50384df1 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-rdseed.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-rdseed.s @@ -35,10 +35,10 @@ rdseed %rax # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 19.50 24.00 1.00 1.00 - 18.00 10.50 - - - - 1.00 - +# CHECK-NEXT: 19.50 24.00 1.00 1.00 - 18.00 10.50 - - - 1.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 6.50 8.00 0.33 0.33 - 6.00 3.50 - - - - 0.33 - rdseedw %ax -# CHECK-NEXT: 6.50 8.00 0.33 0.33 - 6.00 3.50 - - - - 0.33 - rdseedl %eax -# CHECK-NEXT: 6.50 8.00 0.33 0.33 - 6.00 3.50 - - - - 0.33 - rdseedq %rax +# CHECK-NEXT: 6.50 8.00 0.33 0.33 - 6.00 3.50 - - - 0.33 - - rdseedw %ax +# CHECK-NEXT: 6.50 8.00 0.33 0.33 - 6.00 3.50 - - - 0.33 - - rdseedl %eax +# CHECK-NEXT: 6.50 8.00 0.33 0.33 - 6.00 3.50 - - - 0.33 - - rdseedq %rax diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse1.s index b292826913d91..f747b86634702 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse1.s @@ -336,131 +336,131 @@ xorps (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 74.00 24.50 19.33 19.33 5.00 29.50 1.00 5.00 5.00 5.00 - 19.33 - +# CHECK-NEXT: 74.00 24.50 19.33 19.33 5.00 29.50 1.00 5.00 5.00 5.00 19.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - addps %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - addps (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - addps (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - addss %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - addss (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - addss (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - andnps %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - andnps (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - andnps (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - andps %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - andps (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - andps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cmpeqps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cmpeqps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cmpeqps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cmpeqss %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cmpeqss (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cmpeqss (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - comiss %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - comiss (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - comiss (%rax), %xmm1 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvtpi2ps %mm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtpi2ps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtpi2ps (%rax), %xmm2 # CHECK-NEXT: 1.33 0.33 - - - 0.33 - - - - - - - cvtps2pi %xmm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - cvtps2pi (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - cvtps2pi (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtsi2ss %ecx, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 2.00 - - - - - - - cvtsi2ss %rcx, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtsi2ssl (%rax), %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtsi2ssl (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtsi2ssl (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtsi2ssl (%rax), %xmm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvtss2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - 1.00 - - - - - - - cvtss2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtss2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtss2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtss2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtss2si (%rax), %rcx # CHECK-NEXT: 1.33 0.33 - - - 0.33 - - - - - - - cvttps2pi %xmm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - cvttps2pi (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - cvttps2pi (%rax), %mm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvttss2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - 1.00 - - - - - - - cvttss2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvttss2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvttss2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvttss2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvttss2si (%rax), %rcx # CHECK-NEXT: 1.00 - - - - - - - - - - - - divps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - divps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - divps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - divss %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - divss (%rax), %xmm2 -# CHECK-NEXT: 1.83 0.33 0.33 0.33 - 0.33 0.50 - - - - 0.33 - ldmxcsr (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - divss (%rax), %xmm2 +# CHECK-NEXT: 1.83 0.33 0.33 0.33 - 0.33 0.50 - - - 0.33 - - ldmxcsr (%rax) # CHECK-NEXT: 2.00 - - - 0.50 - - 0.50 0.50 0.50 - - - maskmovq %mm0, %mm1 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - maxps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - maxps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - maxps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - maxss %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - maxss (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - maxss (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - minps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - minps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - minps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - minss %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - minss (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - minss (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - movaps %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movaps %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movaps (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movaps (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - movhlps %xmm0, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - movlhps %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movhps %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - movhps (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - movhps (%rax), %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movlps %xmm0, (%rax) -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - movlps (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - movlps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - movmskps %xmm0, %ecx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movntps %xmm0, (%rax) # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movntq %mm0, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - movss %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movss %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movss (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movss (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - movups %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movups %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movups (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movups (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - mulps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - mulps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - mulps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - mulss %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - mulss (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - mulss (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - orps %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - orps (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - orps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pavgb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pavgb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pavgb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pavgw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pavgw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pavgw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - pextrw $1, %mm0, %ecx # CHECK-NEXT: - - - - - 2.00 - - - - - - - pinsrw $1, %eax, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - pinsrw $1, (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - pinsrw $1, (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmaxsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmaxsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmaxsw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmaxub %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmaxub (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmaxub (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pminsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pminsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pminsw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pminub %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pminub (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pminub (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmovmskb %mm0, %ecx # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmulhuw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmulhuw (%rax), %mm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - prefetcht0 (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - prefetcht1 (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - prefetcht2 (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - prefetchnta (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmulhuw (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - prefetcht0 (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - prefetcht1 (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - prefetcht2 (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - prefetchnta (%rax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - psadbw %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - psadbw (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - psadbw (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - pshufw $1, %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - pshufw $1, (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - pshufw $1, (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - rcpps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - rcpps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - rcpps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - rcpss %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - rcpss (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - rcpss (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - rsqrtps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - rsqrtps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - rsqrtps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - rsqrtss %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - rsqrtss (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - rsqrtss (%rax), %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - sfence # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - shufps $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - shufps $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - shufps $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - sqrtps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - sqrtps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - sqrtps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - sqrtss %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - sqrtss (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - sqrtss (%rax), %xmm2 # CHECK-NEXT: 1.50 - - - 0.50 - 0.50 0.50 0.50 0.50 - - - stmxcsr (%rax) # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - subps %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - subps (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - subps (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - subss %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - subss (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - subss (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - ucomiss %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - ucomiss (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - ucomiss (%rax), %xmm1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - unpckhps %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - unpckhps (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - unpckhps (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - unpcklps %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - unpcklps (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - unpcklps (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - xorps %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - xorps (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - xorps (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse2.s index 964caa1d7f73c..ff7d39ba77b3a 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse2.s @@ -692,96 +692,96 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 107.70 99.20 39.33 39.33 8.50 71.70 1.20 7.83 7.50 7.50 0.20 39.00 - +# CHECK-NEXT: 107.70 99.20 39.33 39.33 8.50 71.70 1.20 7.83 7.50 7.50 39.00 0.20 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - addpd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - addpd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - addpd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - addsd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - addsd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - addsd (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - andnpd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - andnpd (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - andnpd (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - andpd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - andpd (%rax), %xmm2 -# CHECK-NEXT: 0.70 0.20 - - 0.50 0.20 0.70 0.50 0.50 0.50 0.20 - - clflush (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - andpd (%rax), %xmm2 +# CHECK-NEXT: 0.70 0.20 - - 0.50 0.20 0.70 0.50 0.50 0.50 - 0.20 - clflush (%rax) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cmpeqpd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cmpeqpd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cmpeqpd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cmpeqsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cmpeqsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cmpeqsd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - comisd %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - comisd (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - comisd (%rax), %xmm1 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtdq2pd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cvtdq2ps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtdq2ps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtdq2ps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - cvtpd2dq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - cvtpd2dq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtpd2pi %xmm0, %mm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - cvtpd2pi (%rax), %mm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - cvtpd2pi (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - cvtpd2ps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - cvtpd2ps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtpi2pd %mm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtpi2pd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtpi2pd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cvtps2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtps2dq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtps2dq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtps2pd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtps2pd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtps2pd (%rax), %xmm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvtsd2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvtsd2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtsd2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtsd2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtsd2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtsd2si (%rax), %rcx # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtsd2ss %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - cvtsd2ss (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - cvtsd2ss (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtsi2sd %ecx, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtsi2sd %rcx, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtsi2sdl (%rax), %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtsi2sdl (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtsi2sdl (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtsi2sdl (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtss2sd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtss2sd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtss2sd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvttpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - cvttpd2dq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - cvttpd2dq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvttpd2pi %xmm0, %mm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - cvttpd2pi (%rax), %mm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - cvttpd2pi (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cvttps2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvttps2dq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvttps2dq (%rax), %xmm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvttsd2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvttsd2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvttsd2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvttsd2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvttsd2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvttsd2si (%rax), %rcx # CHECK-NEXT: 1.00 - - - - - - - - - - - - divpd %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - divpd (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - divpd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - divsd %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - divsd (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - divsd (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - lfence # CHECK-NEXT: - - 0.33 0.33 1.00 - - 0.33 - - - - - maskmovdqu %xmm0, %xmm1 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - maxpd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - maxpd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - maxpd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - maxsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - maxsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - maxsd (%rax), %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - mfence # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - minpd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - minpd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - minpd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - minsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - minsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - minsd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - movapd %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movapd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movapd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movapd (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - movd %eax, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - movd %xmm0, %ecx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movd %xmm0, (%rax) # CHECK-NEXT: - - - - - - - - - - - - - movdqa %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movdqa %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movdqa (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movdqa (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - movdqu %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movdqu %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movdqu (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movdqu (%rax), %xmm2 # CHECK-NEXT: 0.83 0.33 - - - 0.83 - - - - - - - movdq2q %xmm0, %mm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movhpd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - movhpd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - movhpd (%rax), %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movlpd %xmm0, (%rax) -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - movlpd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - movlpd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - movmskpd %xmm0, %ecx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movntil %eax, (%rax) # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movntiq %rax, (%rax) @@ -789,177 +789,177 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movntpd %xmm0, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - movq %xmm0, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - movq %rax, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movq (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movq (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - movq %xmm0, %rcx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movq %xmm0, (%rax) # CHECK-NEXT: 1.33 0.33 - - - 0.33 - - - - - - - movq2dq %mm0, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - movsd %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movsd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movsd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movsd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - movupd %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movupd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movupd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movupd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - mulpd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - mulpd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - mulpd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - mulsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - mulsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - mulsd (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - orpd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - orpd (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - orpd (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - packssdw %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - packssdw (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - packssdw (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - packsswb %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - packsswb (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - packsswb (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - packuswb %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - packuswb (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - packuswb (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - paddb %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - paddb (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - paddb (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - paddd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - paddd (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - paddd (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - paddq %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - paddq (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - paddq (%rax), %mm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - paddq %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - paddq (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - paddq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - paddsb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - paddsb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - paddsb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - paddsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - paddsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - paddsw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - paddusb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - paddusb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - paddusb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - paddusw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - paddusw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - paddusw (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - paddw %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - paddw (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - paddw (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - pand %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - pand (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - pand (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - pandn %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - pandn (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - pandn (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pavgb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pavgb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pavgb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pavgw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pavgw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pavgw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpeqb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpeqb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpeqb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpeqd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpeqd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpeqd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpeqw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpeqw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpeqw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpgtb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpgtb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpgtb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpgtd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpgtd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpgtd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpgtw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpgtw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpgtw (%rax), %xmm2 # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - pextrw $1, %xmm0, %ecx # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - pinsrw $1, %eax, %xmm0 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pinsrw $1, (%rax), %xmm0 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pinsrw $1, (%rax), %xmm0 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaddwd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaddwd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaddwd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaxsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaxsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaxsw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaxub %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaxub (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaxub (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pminsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pminsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pminsw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pminub %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pminub (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pminub (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmovmskb %xmm0, %ecx # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmulhuw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmulhuw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmulhuw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmulhw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmulhw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmulhw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmullw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmullw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmullw (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmuludq %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmuludq (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmuludq (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmuludq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmuludq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmuludq (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - por %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - por (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - por (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - psadbw %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - psadbw (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - psadbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pshufd $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pshufd $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pshufd $1, (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pshufhw $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pshufhw $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pshufhw $1, (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pshuflw $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pshuflw $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pshuflw $1, (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pslld $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - pslld %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pslld (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pslld (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pslldq $1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psllq $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psllq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psllq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psllq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psllw $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psllw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psllw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psllw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psrad $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psrad %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psrad (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psrad (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psraw $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psraw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psraw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psraw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psrld $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psrld %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psrld (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psrld (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - psrldq $1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psrlq $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psrlq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psrlq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psrlq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psrlw $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psrlw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psrlw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psrlw (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - psubb %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - psubb (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - psubb (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - psubd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - psubd (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - psubd (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - psubq %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - psubq (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - psubq (%rax), %mm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - psubq %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - psubq (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - psubq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psubsb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psubsb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psubsb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psubsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psubsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psubsw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psubusb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psubusb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psubusb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psubusw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psubusw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psubusw (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - psubw %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - psubw (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - psubw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpckhbw %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpckhbw (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpckhbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpckhdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpckhdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpckhdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpckhqdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpckhqdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpckhqdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpckhwd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpckhwd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpckhwd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpcklbw %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpcklbw (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpcklbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpckldq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpckldq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpckldq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpcklqdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpcklqdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpcklqdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpcklwd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpcklwd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpcklwd (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - pxor %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - pxor (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - pxor (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - shufpd $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - shufpd $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - shufpd $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - sqrtpd %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - sqrtpd (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - sqrtpd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - sqrtsd %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - sqrtsd (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - sqrtsd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - subpd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - subpd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - subpd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - subsd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - subsd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - subsd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - ucomisd %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - ucomisd (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - ucomisd (%rax), %xmm1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - unpckhpd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - unpckhpd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - unpckhpd (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - unpcklpd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - unpcklpd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - unpcklpd (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - xorpd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - xorpd (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - xorpd (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse3.s index 15baea9604c74..972741435935b 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse3.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse3.s @@ -82,28 +82,28 @@ mwait # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 2.00 9.00 3.33 3.33 - 27.00 4.00 - - - - 3.33 - +# CHECK-NEXT: 2.00 9.00 3.33 3.33 - 27.00 4.00 - - - 3.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - addsubpd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - addsubpd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - addsubpd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - addsubps %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - addsubps (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - addsubps (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - haddpd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - haddpd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - haddpd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - haddps %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - haddps (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - haddps (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - hsubpd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - hsubpd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - hsubpd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - hsubps %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - hsubps (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - lddqu (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - hsubps (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - lddqu (%rax), %xmm2 # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - monitor # CHECK-NEXT: - - - - - 1.00 - - - - - - - movddup %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movddup (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movddup (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - movshdup %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movshdup (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movshdup (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - movsldup %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movsldup (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movsldup (%rax), %xmm2 # CHECK-NEXT: 1.75 1.75 - - - 2.75 3.75 - - - - - - mwait diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse41.s index ffe9150cc5916..c715cbaa55962 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse41.s @@ -269,37 +269,37 @@ roundss $1, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 36.33 50.83 14.67 14.67 2.50 41.83 1.00 2.50 2.50 2.50 - 14.67 - +# CHECK-NEXT: 36.33 50.83 14.67 14.67 2.50 41.83 1.00 2.50 2.50 2.50 14.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - blendpd $11, %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - blendpd $11, (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - blendpd $11, (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - blendps $11, %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - blendps $11, (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - blendps $11, (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - blendvpd %xmm0, %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - blendvpd %xmm0, (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - blendvpd %xmm0, (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - blendvps %xmm0, %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - blendvps %xmm0, (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - blendvps %xmm0, (%rax), %xmm2 # CHECK-NEXT: 1.00 1.50 - - - 0.50 - - - - - - - dppd $22, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.50 0.33 0.33 - 0.50 - - - - - 0.33 - dppd $22, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.50 0.33 0.33 - 0.50 - - - - 0.33 - - dppd $22, (%rax), %xmm2 # CHECK-NEXT: 1.50 2.00 - - - 2.00 0.50 - - - - - - dpps $22, %xmm0, %xmm2 -# CHECK-NEXT: 1.50 2.00 0.33 0.33 - 2.00 0.50 - - - - 0.33 - dpps $22, (%rax), %xmm2 +# CHECK-NEXT: 1.50 2.00 0.33 0.33 - 2.00 0.50 - - - 0.33 - - dpps $22, (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - extractps $1, %xmm0, %ecx # CHECK-NEXT: - - - - 0.50 1.00 - 0.50 0.50 0.50 - - - extractps $1, %xmm0, (%rax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - insertps $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - insertps $1, (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movntdqa (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - insertps $1, (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movntdqa (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - mpsadbw $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 1.50 - - - - - 0.33 - mpsadbw $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 1.50 - - - - 0.33 - - mpsadbw $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - packusdw %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - packusdw (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - packusdw (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - pblendvb %xmm0, %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - pblendvb %xmm0, (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - pblendvb %xmm0, (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pblendw $11, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pblendw $11, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pblendw $11, (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpeqq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpeqq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpeqq (%rax), %xmm2 # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - pextrb $1, %xmm0, %ecx # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - pextrb $1, %xmm0, (%rax) # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - pextrd $1, %xmm0, %ecx @@ -308,64 +308,64 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - pextrq $1, %xmm0, (%rax) # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - pextrw $1, %xmm0, (%rax) # CHECK-NEXT: 1.00 - - - - - - - - - - - - phminposuw %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - phminposuw (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - phminposuw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - pinsrb $1, %eax, %xmm1 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pinsrb $1, (%rax), %xmm1 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pinsrb $1, (%rax), %xmm1 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - pinsrd $1, %eax, %xmm1 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pinsrd $1, (%rax), %xmm1 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pinsrd $1, (%rax), %xmm1 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - pinsrq $1, %rax, %xmm1 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pinsrq $1, (%rax), %xmm1 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pinsrq $1, (%rax), %xmm1 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaxsb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaxsb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaxsb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaxsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaxsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaxsd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaxud %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaxud (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaxud (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaxuw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaxuw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaxuw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pminsb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pminsb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pminsb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pminsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pminsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pminsd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pminud %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pminud (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pminud (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pminuw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pminuw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pminuw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovsxbd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovsxbd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovsxbd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovsxbq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovsxbq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovsxbq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovsxbw %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovsxbw (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovsxbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovsxdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovsxdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovsxdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovsxwd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovsxwd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovsxwd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovsxwq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovsxwq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovsxwq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovzxbd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovzxbd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovzxbd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovzxbq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovzxbq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovzxbq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovzxbw %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovzxbw (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovzxbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovzxdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovzxdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovzxdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovzxwd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovzxwd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovzxwd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovzxwq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovzxwq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovzxwq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmuldq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmuldq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmuldq (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - pmulld %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - pmulld (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - pmulld (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - ptest %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - ptest (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - ptest (%rax), %xmm1 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - roundpd $1, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - roundpd $1, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - roundpd $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - roundps $1, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - roundps $1, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - roundps $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - roundsd $1, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - roundsd $1, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - roundsd $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - roundss $1, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - roundss $1, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - roundss $1, (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse42.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse42.s index cb5b34e9b6468..ad80bad814640 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse42.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse42.s @@ -78,27 +78,27 @@ pcmpgtq (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 28.67 16.67 3.33 3.33 - 8.67 2.00 - - - - 3.33 - +# CHECK-NEXT: 28.67 16.67 3.33 3.33 - 8.67 2.00 - - - 3.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 1.00 - - - - - - - - - - - crc32b %al, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - crc32b (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - crc32b (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - crc32l %eax, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - crc32l (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - crc32l (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - crc32w %ax, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - crc32w (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - crc32w (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - crc32b %al, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - crc32b (%rax), %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - crc32b (%rax), %rcx # CHECK-NEXT: - 1.00 - - - - - - - - - - - crc32q %rax, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - crc32q (%rax), %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - crc32q (%rax), %rcx # CHECK-NEXT: 4.17 1.67 - - - 1.67 0.50 - - - - - - pcmpestri $1, %xmm0, %xmm2 -# CHECK-NEXT: 3.83 1.33 0.33 0.33 - 1.33 0.50 - - - - 0.33 - pcmpestri $1, (%rax), %xmm2 +# CHECK-NEXT: 3.83 1.33 0.33 0.33 - 1.33 0.50 - - - 0.33 - - pcmpestri $1, (%rax), %xmm2 # CHECK-NEXT: 4.50 2.00 - - - 2.00 0.50 - - - - - - pcmpestrm $1, %xmm0, %xmm2 -# CHECK-NEXT: 4.17 1.67 0.33 0.33 - 1.67 0.50 - - - - 0.33 - pcmpestrm $1, (%rax), %xmm2 +# CHECK-NEXT: 4.17 1.67 0.33 0.33 - 1.67 0.50 - - - 0.33 - - pcmpestrm $1, (%rax), %xmm2 # CHECK-NEXT: 3.00 - - - - - - - - - - - - pcmpistri $1, %xmm0, %xmm2 -# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpistri $1, (%rax), %xmm2 +# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpistri $1, (%rax), %xmm2 # CHECK-NEXT: 3.00 - - - - - - - - - - - - pcmpistrm $1, %xmm0, %xmm2 -# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpistrm $1, (%rax), %xmm2 +# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpistrm $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - pcmpgtq %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - pcmpgtq (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - pcmpgtq (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-ssse3.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-ssse3.s index 33ec9b0fa64d2..31e61eb637476 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-ssse3.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-ssse3.s @@ -188,71 +188,71 @@ psignw (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 38.67 25.67 10.67 10.67 - 49.67 - - - - - 10.67 - +# CHECK-NEXT: 38.67 25.67 10.67 10.67 - 49.67 - - - - 10.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 1.00 - - - - - - - - - - - - pabsb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pabsb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pabsb (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pabsb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pabsb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pabsb (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pabsd %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pabsd (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pabsd (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pabsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pabsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pabsd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pabsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pabsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pabsw (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pabsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pabsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pabsw (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - palignr $1, %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - palignr $1, (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - palignr $1, (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - palignr $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - palignr $1, (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - palignr $1, (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - phaddd %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - phaddd (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - phaddd (%rax), %mm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - phaddd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - phaddd (%rax), %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - phaddd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - 2.00 - - - - - - - phaddsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 2.00 - - - - - 0.33 - phaddsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 2.00 - - - - 0.33 - - phaddsw (%rax), %mm2 # CHECK-NEXT: 0.50 1.50 - - - 1.00 - - - - - - - phaddsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - - 0.33 - phaddsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - 0.33 - - phaddsw (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - phaddw %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - phaddw (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - phaddw (%rax), %mm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - phaddw %xmm0, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - phaddw (%rax), %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - phaddw (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - phsubd %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - phsubd (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - phsubd (%rax), %mm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - phsubd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - phsubd (%rax), %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - phsubd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - 2.00 - - - - - - - phsubsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 2.00 - - - - - 0.33 - phsubsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 2.00 - - - - 0.33 - - phsubsw (%rax), %mm2 # CHECK-NEXT: 0.50 1.50 - - - 1.00 - - - - - - - phsubsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - - 0.33 - phsubsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - 0.33 - - phsubsw (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - phsubw %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - phsubw (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - phsubw (%rax), %mm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - phsubw %xmm0, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - phsubw (%rax), %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - phsubw (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmaddubsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmaddubsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmaddubsw (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaddubsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaddubsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaddubsw (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmulhrsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmulhrsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmulhrsw (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmulhrsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmulhrsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmulhrsw (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - pshufb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - pshufb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - pshufb (%rax), %mm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pshufb %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pshufb (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pshufb (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psignb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psignb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psignb (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psignb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psignb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psignb (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psignd %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psignd (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psignd (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psignd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psignd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psignd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psignw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psignw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psignw (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psignw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psignw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psignw (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-vaes.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-vaes.s index 74b19b968bec5..e6722b884eb95 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-vaes.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-vaes.s @@ -48,15 +48,15 @@ vaesenclast (%rax), %ymm1, %ymm3 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 4.00 4.00 1.33 1.33 - - - - - - - 1.33 - +# CHECK-NEXT: 4.00 4.00 1.33 1.33 - - - - - - 1.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesdec %ymm0, %ymm1, %ymm3 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesdec (%rax), %ymm1, %ymm3 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesdec (%rax), %ymm1, %ymm3 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesdeclast %ymm0, %ymm1, %ymm3 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesdeclast (%rax), %ymm1, %ymm3 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesdeclast (%rax), %ymm1, %ymm3 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesenc %ymm0, %ymm1, %ymm3 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesenc (%rax), %ymm1, %ymm3 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesenc (%rax), %ymm1, %ymm3 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesenclast %ymm0, %ymm1, %ymm3 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesenclast (%rax), %ymm1, %ymm3 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesenclast (%rax), %ymm1, %ymm3 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-vpclmulqdq.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-vpclmulqdq.s index cd834d35c43d5..f474a07b7be49 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-vpclmulqdq.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-vpclmulqdq.s @@ -33,9 +33,9 @@ vpclmulqdq $11, (%rax), %ymm1, %ymm3 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpclmulqdq $11, %ymm0, %ymm1, %ymm3 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpclmulqdq $11, (%rax), %ymm1, %ymm3 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpclmulqdq $11, (%rax), %ymm1, %ymm3 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-x86_32.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-x86_32.s index 559ae7e957a29..fd5968e319bce 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-x86_32.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-x86_32.s @@ -64,7 +64,7 @@ salc # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 3.60 3.60 0.33 0.33 - 3.60 3.60 - - - 0.60 0.33 - +# CHECK-NEXT: 3.60 3.60 0.33 0.33 - 3.60 3.60 - - - 0.33 0.60 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -79,5 +79,5 @@ salc # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - daa # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - das # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - into -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - leave +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - leave # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - salc diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-x86_64.s index ec303b4d9f2df..5cad9072f37c6 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-x86_64.s @@ -1965,419 +1965,419 @@ xorq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 949.92 794.58 213.00 213.00 202.50 599.75 793.42 203.00 203.00 202.50 191.33 213.00 - +# CHECK-NEXT: 949.92 794.58 213.00 213.00 202.50 599.75 793.42 203.00 203.00 202.50 213.00 191.33 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcb $0, %al # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcb $0, %dil -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcb $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcb $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcb $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcb $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcb $7, %al # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcb $7, %dil -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcb $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcb $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcb $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcb $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcb %sil, %dil -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - adcb %sil, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock adcb %sil, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adcb (%rax), %dil +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - adcb %sil, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock adcb %sil, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adcb (%rax), %dil # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcw $0, %ax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcw $0, %di -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcw $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcw $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcw $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcw $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcw $511, %ax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcw $511, %di -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcw $511, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcw $511, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcw $511, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcw $511, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcw $7, %di -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcw $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcw $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcw $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcw $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcw %si, %di -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - adcw %si, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock adcw %si, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adcw (%rax), %di +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - adcw %si, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock adcw %si, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adcw (%rax), %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcl $0, %eax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcl $0, %edi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcl $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcl $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcl $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcl $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcl $665536, %eax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcl $665536, %edi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcl $665536, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcl $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcl $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcl $665536, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcl $7, %edi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcl $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcl $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcl $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcl $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcl %esi, %edi -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - adcl %esi, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock adcl %esi, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adcl (%rax), %edi +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - adcl %esi, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock adcl %esi, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adcl (%rax), %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcq $0, %rax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcq $0, %rdi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcq $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcq $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcq $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcq $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcq $665536, %rax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcq $665536, %rdi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcq $665536, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcq $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcq $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcq $665536, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcq $7, %rdi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcq $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcq $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcq $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcq $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcq %rsi, %rdi -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - adcq %rsi, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock adcq %rsi, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adcq (%rax), %rdi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - addb (%rax), %dil -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - addw (%rax), %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - addl (%rax), %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addq $665536, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - adcq %rsi, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock adcq %rsi, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adcq (%rax), %rdi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - addb (%rax), %dil +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - addw (%rax), %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - addl (%rax), %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addq $665536, (%rax) # CHECK-NEXT: - - - - - - - - - - - - - addq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - addq (%rax), %rdi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - andb (%rax), %dil -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - andw (%rax), %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - andl (%rax), %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - andq (%rax), %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - addq (%rax), %rdi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - andb (%rax), %dil +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - andw (%rax), %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - andl (%rax), %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andq $7, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - andq (%rax), %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - bsfw %si, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - bsrw %si, %di -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bsfw (%rax), %di -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bsrw (%rax), %di +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bsfw (%rax), %di +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bsrw (%rax), %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - bsfl %esi, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - bsrl %esi, %edi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bsfl (%rax), %edi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bsrl (%rax), %edi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bsfl (%rax), %edi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bsrl (%rax), %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - bsfq %rsi, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - bsrq %rsi, %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bsfq (%rax), %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bsrq (%rax), %rdi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bsfq (%rax), %rdi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bsrq (%rax), %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - bswapl %eax # CHECK-NEXT: 0.50 1.00 - - - - 0.50 - - - - - - bswapq %rax # CHECK-NEXT: - 1.00 - - - - - - - - - - - btw %si, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - btcw %si, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - btrw %si, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - btsw %si, %di -# CHECK-NEXT: 1.80 2.47 0.33 0.33 - 1.47 1.80 - - - 1.47 0.33 - btw %si, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - btcw %si, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - btrw %si, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - btsw %si, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - lock btcw %si, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - lock btrw %si, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - lock btsw %si, (%rax) +# CHECK-NEXT: 1.80 2.47 0.33 0.33 - 1.47 1.80 - - - 0.33 1.47 - btw %si, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - btcw %si, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - btrw %si, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - btsw %si, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - lock btcw %si, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - lock btrw %si, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - lock btsw %si, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - btw $7, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - btcw $7, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - btrw $7, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - btsw $7, %di -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - btw $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btcw $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btrw $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btsw $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btcw $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btrw $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btsw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - btw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btcw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btrw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btsw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btcw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btrw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btsw $7, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - btl %esi, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btcl %esi, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btrl %esi, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btsl %esi, %edi -# CHECK-NEXT: 1.80 2.47 0.33 0.33 - 1.47 1.80 - - - 1.47 0.33 - btl %esi, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - btcl %esi, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - btrl %esi, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - btsl %esi, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - lock btcl %esi, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - lock btrl %esi, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - lock btsl %esi, (%rax) +# CHECK-NEXT: 1.80 2.47 0.33 0.33 - 1.47 1.80 - - - 0.33 1.47 - btl %esi, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - btcl %esi, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - btrl %esi, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - btsl %esi, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - lock btcl %esi, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - lock btrl %esi, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - lock btsl %esi, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - btl $7, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btcl $7, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btrl $7, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btsl $7, %edi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - btl $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btcl $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btrl $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btsl $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btcl $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btrl $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btsl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - btl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btcl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btrl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btsl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btcl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btrl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btsl $7, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - btq %rsi, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btcq %rsi, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btrq %rsi, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btsq %rsi, %rdi -# CHECK-NEXT: 2.00 2.00 0.33 0.33 - 1.00 2.00 - - - 1.00 0.33 - btq %rsi, (%rax) -# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.80 0.33 - btcq %rsi, (%rax) -# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.80 0.33 - btrq %rsi, (%rax) -# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.80 0.33 - btsq %rsi, (%rax) -# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.80 0.33 - lock btcq %rsi, (%rax) -# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.80 0.33 - lock btrq %rsi, (%rax) -# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.80 0.33 - lock btsq %rsi, (%rax) +# CHECK-NEXT: 2.00 2.00 0.33 0.33 - 1.00 2.00 - - - 0.33 1.00 - btq %rsi, (%rax) +# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.33 0.80 - btcq %rsi, (%rax) +# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.33 0.80 - btrq %rsi, (%rax) +# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.33 0.80 - btsq %rsi, (%rax) +# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.33 0.80 - lock btcq %rsi, (%rax) +# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.33 0.80 - lock btrq %rsi, (%rax) +# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.33 0.80 - lock btsq %rsi, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - btq $7, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btcq $7, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btrq $7, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btsq $7, %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - btq $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btcq $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btrq $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btsq $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btcq $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btrq $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btsq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - btq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btcq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btrq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btsq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btcq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btrq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btsq $7, (%rax) # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - cbtw # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - cwtl # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - cltq -# CHECK-NEXT: 0.70 0.20 - - - 0.20 0.70 - - - 0.20 - - cwtd +# CHECK-NEXT: 0.70 0.20 - - - 0.20 0.70 - - - - 0.20 - cwtd # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cltd # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cqto # CHECK-NEXT: - - - - - - - - - - - - - clc -# CHECK-NEXT: 0.70 0.20 - - - 0.20 0.70 - - - 0.20 - - cld -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmc -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpb (%rax), %dil -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpw (%rax), %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpl (%rax), %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpq (%rax), %rdi -# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 1.00 0.67 - cmpsb %es:(%rdi), (%rsi) -# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 1.00 0.67 - cmpsw %es:(%rdi), (%rsi) -# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 1.00 0.67 - cmpsl %es:(%rdi), (%rsi) -# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 1.00 0.67 - cmpsq %es:(%rdi), (%rsi) -# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - 0.60 - - cmpxchgb %cl, %bl -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - cmpxchgb %cl, (%rbx) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - lock cmpxchgb %cl, (%rbx) -# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - 0.60 - - cmpxchgw %cx, %bx -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - cmpxchgw %cx, (%rbx) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - lock cmpxchgw %cx, (%rbx) -# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - 0.60 - - cmpxchgl %ecx, %ebx -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - cmpxchgl %ecx, (%rbx) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - lock cmpxchgl %ecx, (%rbx) -# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - 0.60 - - cmpxchgq %rcx, %rbx -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - cmpxchgq %rcx, (%rbx) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - lock cmpxchgq %rcx, (%rbx) +# CHECK-NEXT: 0.70 0.20 - - - 0.20 0.70 - - - - 0.20 - cld +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmc +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpb (%rax), %dil +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpw (%rax), %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpl (%rax), %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpq $7, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpq (%rax), %rdi +# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 0.67 1.00 - cmpsb %es:(%rdi), (%rsi) +# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 0.67 1.00 - cmpsw %es:(%rdi), (%rsi) +# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 0.67 1.00 - cmpsl %es:(%rdi), (%rsi) +# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 0.67 1.00 - cmpsq %es:(%rdi), (%rsi) +# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - - 0.60 - cmpxchgb %cl, %bl +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - cmpxchgb %cl, (%rbx) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - lock cmpxchgb %cl, (%rbx) +# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - - 0.60 - cmpxchgw %cx, %bx +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - cmpxchgw %cx, (%rbx) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - lock cmpxchgw %cx, (%rbx) +# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - - 0.60 - cmpxchgl %ecx, %ebx +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - cmpxchgl %ecx, (%rbx) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - lock cmpxchgl %ecx, (%rbx) +# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - - 0.60 - cmpxchgq %rcx, %rbx +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - cmpxchgq %rcx, (%rbx) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - lock cmpxchgq %rcx, (%rbx) # CHECK-NEXT: 7.50 6.50 - - 0.50 5.00 5.00 0.50 0.50 0.50 - - - cpuid -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - decb %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - decb (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock decb (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - decw %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - decw (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock decw (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - decl %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - decl (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock decl (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - decb %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - decb (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock decb (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - decw %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - decw (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock decw (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - decl %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - decl (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock decl (%rax) # CHECK-NEXT: - - - - - - - - - - - - - decq %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - decq (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock decq (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - decq (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock decq (%rax) # CHECK-NEXT: - 3.00 - - - - - - - - - - - divb %dil # CHECK-NEXT: - 3.00 - - - - - - - - - - - divb (%rax) -# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - 0.20 - - divw %si -# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - divw (%rax) -# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - 0.20 - - divl %edx -# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - divl (%rax) +# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - - 0.20 - divw %si +# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - divw (%rax) +# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - - 0.20 - divl %edx +# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - divl (%rax) # CHECK-NEXT: - 3.00 - - - - - - - - - - - divq %rcx -# CHECK-NEXT: - 3.00 0.33 0.33 - - - - - - - 0.33 - divq (%rax) -# CHECK-NEXT: 12.50 2.00 4.67 4.67 2.00 9.00 10.50 2.50 2.50 2.00 - 4.67 - enter $7, $4095 +# CHECK-NEXT: - 3.00 0.33 0.33 - - - - - - 0.33 - - divq (%rax) +# CHECK-NEXT: 12.50 2.00 4.67 4.67 2.00 9.00 10.50 2.50 2.50 2.00 4.67 - - enter $7, $4095 # CHECK-NEXT: - 3.00 - - - - - - - - - - - idivb %dil # CHECK-NEXT: - 3.00 - - - - - - - - - - - idivb (%rax) -# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - 0.20 - - idivw %si -# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - idivw (%rax) -# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - 0.20 - - idivl %edx -# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - idivl (%rax) +# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - - 0.20 - idivw %si +# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - idivw (%rax) +# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - - 0.20 - idivl %edx +# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - idivl (%rax) # CHECK-NEXT: - 3.00 - - - - - - - - - - - idivq %rcx -# CHECK-NEXT: - 3.00 0.33 0.33 - - - - - - - 0.33 - idivq (%rax) +# CHECK-NEXT: - 3.00 0.33 0.33 - - - - - - 0.33 - - idivq (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - imulb %dil -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imulb (%rax) -# CHECK-NEXT: 0.90 1.40 - - - 0.40 0.90 - - - 0.40 - - imulw %di -# CHECK-NEXT: 0.90 1.40 0.33 0.33 - 0.40 0.90 - - - 0.40 0.33 - imulw (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imulb (%rax) +# CHECK-NEXT: 0.90 1.40 - - - 0.40 0.90 - - - - 0.40 - imulw %di +# CHECK-NEXT: 0.90 1.40 0.33 0.33 - 0.40 0.90 - - - 0.33 0.40 - imulw (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - imulw %si, %di -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imulw (%rax), %di -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - imulw $511, %si, %di -# CHECK-NEXT: 0.20 1.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - imulw $511, (%rax), %di -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - imulw $7, %si, %di -# CHECK-NEXT: 0.20 1.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - imulw $7, (%rax), %di -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - imull %edi -# CHECK-NEXT: 0.70 1.20 0.33 0.33 - 0.20 0.70 - - - 0.20 0.33 - imull (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imulw (%rax), %di +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - imulw $511, %si, %di +# CHECK-NEXT: 0.20 1.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - imulw $511, (%rax), %di +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - imulw $7, %si, %di +# CHECK-NEXT: 0.20 1.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - imulw $7, (%rax), %di +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - imull %edi +# CHECK-NEXT: 0.70 1.20 0.33 0.33 - 0.20 0.70 - - - 0.33 0.20 - imull (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - imull %esi, %edi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imull (%rax), %edi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imull (%rax), %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - imull $665536, %esi, %edi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imull $665536, (%rax), %edi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imull $665536, (%rax), %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - imull $7, %esi, %edi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imull $7, (%rax), %edi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imull $7, (%rax), %edi # CHECK-NEXT: - 1.00 - - - 1.00 - - - - - - - imulq %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - imulq (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - imulq (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - imulq %rsi, %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imulq (%rax), %rdi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imulq (%rax), %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - imulq $665536, %rsi, %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imulq $665536, (%rax), %rdi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imulq $665536, (%rax), %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - imulq $7, %rsi, %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imulq $7, (%rax), %rdi -# CHECK-NEXT: 20.70 20.87 2.67 2.67 - 21.87 13.70 - - - 1.87 2.67 - inb $7, %al -# CHECK-NEXT: 20.70 20.87 2.33 2.33 - 21.87 13.70 - - - 1.87 2.33 - inb %dx, %al -# CHECK-NEXT: 21.00 20.67 2.33 2.33 - 22.67 14.00 - - - 1.67 2.33 - inw $7, %ax -# CHECK-NEXT: 21.30 21.30 2.33 2.33 - 21.80 13.80 - - - 1.80 2.33 - inw %dx, %ax -# CHECK-NEXT: 22.20 22.87 3.33 3.33 - 21.87 15.20 - - - 1.87 3.33 - inl $7, %eax -# CHECK-NEXT: 22.80 23.47 3.67 3.67 - 23.47 15.80 - - - 2.47 3.67 - inl %dx, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - incb %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - incb (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock incb (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - incw %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - incw (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock incw (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - incl %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - incl (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock incl (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imulq $7, (%rax), %rdi +# CHECK-NEXT: 20.70 20.87 2.67 2.67 - 21.87 13.70 - - - 2.67 1.87 - inb $7, %al +# CHECK-NEXT: 20.70 20.87 2.33 2.33 - 21.87 13.70 - - - 2.33 1.87 - inb %dx, %al +# CHECK-NEXT: 21.00 20.67 2.33 2.33 - 22.67 14.00 - - - 2.33 1.67 - inw $7, %ax +# CHECK-NEXT: 21.30 21.30 2.33 2.33 - 21.80 13.80 - - - 2.33 1.80 - inw %dx, %ax +# CHECK-NEXT: 22.20 22.87 3.33 3.33 - 21.87 15.20 - - - 3.33 1.87 - inl $7, %eax +# CHECK-NEXT: 22.80 23.47 3.67 3.67 - 23.47 15.80 - - - 3.67 2.47 - inl %dx, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - incb %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - incb (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock incb (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - incw %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - incw (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock incw (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - incl %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - incl (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock incl (%rax) # CHECK-NEXT: - - - - - - - - - - - - - incq %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - incq (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock incq (%rax) -# CHECK-NEXT: 20.20 18.20 2.67 2.67 0.50 20.20 13.20 0.50 0.50 0.50 1.20 2.67 - insb %dx, %es:(%rdi) -# CHECK-NEXT: 20.97 18.47 3.00 3.00 0.50 20.80 13.63 0.50 0.50 0.50 1.13 3.00 - insw %dx, %es:(%rdi) -# CHECK-NEXT: 22.17 18.33 3.67 3.67 0.50 22.67 14.83 0.50 0.50 0.50 1.00 3.67 - insl %dx, %es:(%rdi) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - incq (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock incq (%rax) +# CHECK-NEXT: 20.20 18.20 2.67 2.67 0.50 20.20 13.20 0.50 0.50 0.50 2.67 1.20 - insb %dx, %es:(%rdi) +# CHECK-NEXT: 20.97 18.47 3.00 3.00 0.50 20.80 13.63 0.50 0.50 0.50 3.00 1.13 - insw %dx, %es:(%rdi) +# CHECK-NEXT: 22.17 18.33 3.67 3.67 0.50 22.67 14.83 0.50 0.50 0.50 3.67 1.00 - insl %dx, %es:(%rdi) # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - int $7 -# CHECK-NEXT: 9.80 7.47 - - 2.50 8.47 4.80 2.50 2.50 2.50 1.47 - - invlpg (%rax) +# CHECK-NEXT: 9.80 7.47 - - 2.50 8.47 4.80 2.50 2.50 2.50 - 1.47 - invlpg (%rax) # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - invlpga # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - lahf -# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.40 0.33 - leave -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - lodsb (%rsi), %al -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - lodsw (%rsi), %ax -# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.40 0.33 - lodsl (%rsi), %eax -# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.40 0.33 - lodsq (%rsi), %rax -# CHECK-NEXT: 2.40 1.40 - - - 0.40 2.40 - - - 0.40 - - loop 0 -# CHECK-NEXT: 3.80 1.80 - - - 0.80 3.80 - - - 0.80 - - loope 0 -# CHECK-NEXT: 3.80 1.80 - - - 0.80 3.80 - - - 0.80 - - loopne 0 -# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.80 0.33 - movsb (%rsi), %es:(%rdi) -# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.80 0.33 - movsw (%rsi), %es:(%rdi) -# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.80 0.33 - movsl (%rsi), %es:(%rdi) -# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.80 0.33 - movsq (%rsi), %es:(%rdi) -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - movsbw %al, %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - movzbw %al, %di +# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.33 0.40 - leave +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - lodsb (%rsi), %al +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - lodsw (%rsi), %ax +# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.33 0.40 - lodsl (%rsi), %eax +# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.33 0.40 - lodsq (%rsi), %rax +# CHECK-NEXT: 2.40 1.40 - - - 0.40 2.40 - - - - 0.40 - loop 0 +# CHECK-NEXT: 3.80 1.80 - - - 0.80 3.80 - - - - 0.80 - loope 0 +# CHECK-NEXT: 3.80 1.80 - - - 0.80 3.80 - - - - 0.80 - loopne 0 +# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.33 0.80 - movsb (%rsi), %es:(%rdi) +# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.33 0.80 - movsw (%rsi), %es:(%rdi) +# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.33 0.80 - movsl (%rsi), %es:(%rdi) +# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.33 0.80 - movsq (%rsi), %es:(%rdi) +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - movsbw %al, %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - movzbw %al, %di # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - movsbw (%rax), %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - movzbw (%rax), %di -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - movsbl %al, %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - movzbl %al, %edi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movsbl (%rax), %edi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movzbl (%rax), %edi -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - movsbq %al, %rdi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - movzbq %al, %rdi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movsbq (%rax), %rdi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movzbq (%rax), %rdi -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - movswl %ax, %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - movzwl %ax, %edi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movswl (%rax), %edi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movzwl (%rax), %edi -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - movswq %ax, %rdi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - movzwq %ax, %rdi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movswq (%rax), %rdi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movzwq (%rax), %rdi -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - movslq %eax, %rdi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movslq (%rax), %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - movzbw (%rax), %di +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - movsbl %al, %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - movzbl %al, %edi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movsbl (%rax), %edi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movzbl (%rax), %edi +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - movsbq %al, %rdi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - movzbq %al, %rdi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movsbq (%rax), %rdi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movzbq (%rax), %rdi +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - movswl %ax, %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - movzwl %ax, %edi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movswl (%rax), %edi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movzwl (%rax), %edi +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - movswq %ax, %rdi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - movzwq %ax, %rdi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movswq (%rax), %rdi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movzwq (%rax), %rdi +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - movslq %eax, %rdi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movslq (%rax), %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - mulb %dil -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - mulb (%rax) -# CHECK-NEXT: 0.90 1.40 - - - 0.40 0.90 - - - 0.40 - - mulw %si -# CHECK-NEXT: 0.90 1.40 0.33 0.33 - 0.40 0.90 - - - 0.40 0.33 - mulw (%rax) -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - mull %edx -# CHECK-NEXT: 0.70 1.20 0.33 0.33 - 0.20 0.70 - - - 0.20 0.33 - mull (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - mulb (%rax) +# CHECK-NEXT: 0.90 1.40 - - - 0.40 0.90 - - - - 0.40 - mulw %si +# CHECK-NEXT: 0.90 1.40 0.33 0.33 - 0.40 0.90 - - - 0.33 0.40 - mulw (%rax) +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - mull %edx +# CHECK-NEXT: 0.70 1.20 0.33 0.33 - 0.20 0.70 - - - 0.33 0.20 - mull (%rax) # CHECK-NEXT: - 1.00 - - - 1.00 - - - - - - - mulq %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - mulq (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - negb %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - negb (%r8) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock negb (%r8) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - negw %si -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - negw (%r9) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock negw (%r9) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - negl %edx -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - negl (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock negl (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - negq %rcx -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - negq (%r10) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock negq (%r10) +# CHECK-NEXT: - 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - mulq (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - negb %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - negb (%r8) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock negb (%r8) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - negw %si +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - negw (%r9) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock negw (%r9) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - negl %edx +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - negl (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock negl (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - negq %rcx +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - negq (%r10) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock negq (%r10) # CHECK-NEXT: - - - - - - - - - - - - - nop # CHECK-NEXT: - - - - - - - - - - - - - nopw %di # CHECK-NEXT: - - - - - - - - - - - - - nopw (%rcx) @@ -2385,303 +2385,303 @@ xorq (%rax), %rdi # CHECK-NEXT: - - - - - - - - - - - - - nopl (%r8) # CHECK-NEXT: - - - - - - - - - - - - - nopq %rdx # CHECK-NEXT: - - - - - - - - - - - - - nopq (%r9) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - notb %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - notb (%r8) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock notb (%r8) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - notw %si -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - notw (%r9) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock notw (%r9) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - notl %edx -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - notl (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock notl (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - notq %rcx -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - notq (%r10) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock notq (%r10) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - orb (%rax), %dil -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - orw (%rax), %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - orl (%rax), %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - orq (%rax), %rdi -# CHECK-NEXT: 19.00 16.00 1.67 1.67 0.50 16.50 13.50 0.50 0.50 0.50 1.00 1.67 - outb %al, $7 -# CHECK-NEXT: 19.00 16.00 1.67 1.67 0.50 16.00 14.00 0.50 0.50 0.50 1.00 1.67 - outb %al, %dx -# CHECK-NEXT: 21.30 15.80 2.33 2.33 0.50 17.30 14.80 0.50 0.50 0.50 0.80 2.33 - outw %ax, $7 -# CHECK-NEXT: 20.70 16.20 2.33 2.33 0.50 17.20 14.70 0.50 0.50 0.50 1.20 2.33 - outw %ax, %dx -# CHECK-NEXT: 22.30 15.80 3.00 3.00 0.50 19.30 15.80 0.50 0.50 0.50 0.80 3.00 - outl %eax, $7 -# CHECK-NEXT: 21.70 16.20 3.00 3.00 0.50 19.20 15.70 0.50 0.50 0.50 1.20 3.00 - outl %eax, %dx -# CHECK-NEXT: 20.70 17.20 2.33 2.33 0.50 18.20 13.70 0.50 0.50 0.50 1.20 2.33 - outsb (%rsi), %dx -# CHECK-NEXT: 21.00 17.50 2.67 2.67 0.50 19.00 14.50 0.50 0.50 0.50 1.00 2.67 - outsw (%rsi), %dx -# CHECK-NEXT: 22.20 17.20 3.33 3.33 0.50 21.20 15.20 0.50 0.50 0.50 1.20 3.33 - outsl (%rsi), %dx +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - notb %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - notb (%r8) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock notb (%r8) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - notw %si +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - notw (%r9) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock notw (%r9) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - notl %edx +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - notl (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock notl (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - notq %rcx +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - notq (%r10) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock notq (%r10) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - orb (%rax), %dil +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - orw (%rax), %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - orl (%rax), %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orq $7, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - orq (%rax), %rdi +# CHECK-NEXT: 19.00 16.00 1.67 1.67 0.50 16.50 13.50 0.50 0.50 0.50 1.67 1.00 - outb %al, $7 +# CHECK-NEXT: 19.00 16.00 1.67 1.67 0.50 16.00 14.00 0.50 0.50 0.50 1.67 1.00 - outb %al, %dx +# CHECK-NEXT: 21.30 15.80 2.33 2.33 0.50 17.30 14.80 0.50 0.50 0.50 2.33 0.80 - outw %ax, $7 +# CHECK-NEXT: 20.70 16.20 2.33 2.33 0.50 17.20 14.70 0.50 0.50 0.50 2.33 1.20 - outw %ax, %dx +# CHECK-NEXT: 22.30 15.80 3.00 3.00 0.50 19.30 15.80 0.50 0.50 0.50 3.00 0.80 - outl %eax, $7 +# CHECK-NEXT: 21.70 16.20 3.00 3.00 0.50 19.20 15.70 0.50 0.50 0.50 3.00 1.20 - outl %eax, %dx +# CHECK-NEXT: 20.70 17.20 2.33 2.33 0.50 18.20 13.70 0.50 0.50 0.50 2.33 1.20 - outsb (%rsi), %dx +# CHECK-NEXT: 21.00 17.50 2.67 2.67 0.50 19.00 14.50 0.50 0.50 0.50 2.67 1.00 - outsw (%rsi), %dx +# CHECK-NEXT: 22.20 17.20 3.33 3.33 0.50 21.20 15.20 0.50 0.50 0.50 3.33 1.20 - outsl (%rsi), %dx # CHECK-NEXT: 0.50 - - - - 1.00 0.50 - - - - - - pause -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rclb %dil -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrb %dil -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rclb (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrb (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rclb $7, %dil -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrb $7, %dil -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rclb $7, (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrb $7, (%rax) -# CHECK-NEXT: 2.90 2.40 - - - 0.40 2.90 - - - 0.40 - - rclb %cl, %dil -# CHECK-NEXT: 2.60 3.60 - - - 0.60 2.60 - - - 0.60 - - rcrb %cl, %dil -# CHECK-NEXT: 2.70 2.20 0.33 0.33 0.50 0.20 2.70 0.50 0.50 0.50 0.20 0.33 - rclb %cl, (%rax) -# CHECK-NEXT: 2.40 3.40 0.33 0.33 0.50 0.40 2.40 0.50 0.50 0.50 0.40 0.33 - rcrb %cl, (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rclw %di -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrw %di -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rclw (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrw (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rclw $7, %di -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrw $7, %di -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rclw $7, (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrw $7, (%rax) -# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - 0.40 - - rclw %cl, %di -# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - 0.40 - - rcrw %cl, %di -# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.40 0.33 - rclw %cl, (%rax) -# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.40 0.33 - rcrw %cl, (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcll %edi -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrl %edi -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcll (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrl (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcll $7, %edi -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrl $7, %edi -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcll $7, (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrl $7, (%rax) -# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - 0.40 - - rcll %cl, %edi -# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - 0.40 - - rcrl %cl, %edi -# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.40 0.33 - rcll %cl, (%rax) -# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.40 0.33 - rcrl %cl, (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rclq %rdi -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrq %rdi -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rclq (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrq (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rclq $7, %rdi -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrq $7, %rdi -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rclq $7, (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrq $7, (%rax) -# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - 0.40 - - rclq %cl, %rdi -# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - 0.40 - - rcrq %cl, %rdi -# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.40 0.33 - rclq %cl, (%rax) -# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.40 0.33 - rcrq %cl, (%rax) -# CHECK-NEXT: 16.33 13.33 - - - 10.67 13.33 - - - 0.33 - - rdmsr -# CHECK-NEXT: 4.80 3.80 - - - 2.80 4.80 - - - 1.80 - - rdpmc -# CHECK-NEXT: 4.00 4.00 - - - 2.00 4.00 - - - 1.00 - - rdtsc +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rclb %dil +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrb %dil +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rclb (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrb (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rclb $7, %dil +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrb $7, %dil +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rclb $7, (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrb $7, (%rax) +# CHECK-NEXT: 2.90 2.40 - - - 0.40 2.90 - - - - 0.40 - rclb %cl, %dil +# CHECK-NEXT: 2.60 3.60 - - - 0.60 2.60 - - - - 0.60 - rcrb %cl, %dil +# CHECK-NEXT: 2.70 2.20 0.33 0.33 0.50 0.20 2.70 0.50 0.50 0.50 0.33 0.20 - rclb %cl, (%rax) +# CHECK-NEXT: 2.40 3.40 0.33 0.33 0.50 0.40 2.40 0.50 0.50 0.50 0.33 0.40 - rcrb %cl, (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rclw %di +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrw %di +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rclw (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrw (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rclw $7, %di +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrw $7, %di +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rclw $7, (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrw $7, (%rax) +# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - - 0.40 - rclw %cl, %di +# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - - 0.40 - rcrw %cl, %di +# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.33 0.40 - rclw %cl, (%rax) +# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.33 0.40 - rcrw %cl, (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcll %edi +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrl %edi +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcll (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrl (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcll $7, %edi +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrl $7, %edi +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcll $7, (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrl $7, (%rax) +# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - - 0.40 - rcll %cl, %edi +# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - - 0.40 - rcrl %cl, %edi +# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.33 0.40 - rcll %cl, (%rax) +# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.33 0.40 - rcrl %cl, (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rclq %rdi +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrq %rdi +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rclq (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrq (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rclq $7, %rdi +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrq $7, %rdi +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rclq $7, (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrq $7, (%rax) +# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - - 0.40 - rclq %cl, %rdi +# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - - 0.40 - rcrq %cl, %rdi +# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.33 0.40 - rclq %cl, (%rax) +# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.33 0.40 - rcrq %cl, (%rax) +# CHECK-NEXT: 16.33 13.33 - - - 10.67 13.33 - - - - 0.33 - rdmsr +# CHECK-NEXT: 4.80 3.80 - - - 2.80 4.80 - - - - 1.80 - rdpmc +# CHECK-NEXT: 4.00 4.00 - - - 2.00 4.00 - - - - 1.00 - rdtsc # CHECK-NEXT: 7.50 5.33 - - - 4.00 4.17 - - - - - - rdtscp # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolb %dil # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorb %dil -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolb (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorb (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolb (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorb (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolb $7, %dil # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorb $7, %dil -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolb $7, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorb $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolb $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorb $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolb %cl, %dil # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorb %cl, %dil -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolb %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorb %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolb %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorb %cl, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolw %di # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorw %di -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolw (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorw (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolw (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorw (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolw $7, %di # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorw $7, %di -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolw $7, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorw $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolw $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorw $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolw %cl, %di # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorw %cl, %di -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolw %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorw %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolw %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorw %cl, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - roll %edi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorl %edi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - roll (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorl (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - roll (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorl (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - roll $7, %edi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorl $7, %edi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - roll $7, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorl $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - roll $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorl $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - roll %cl, %edi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorl %cl, %edi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - roll %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorl %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - roll %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorl %cl, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolq %rdi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorq %rdi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolq (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorq (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolq (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorq (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolq $7, %rdi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorq $7, %rdi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolq $7, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorq $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolq $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorq $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolq %cl, %rdi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorq %cl, %rdi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolq %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorq %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolq %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorq %cl, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - sahf # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarb %dil # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlb %dil # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrb %dil -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarb (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shlb (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrb (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarb (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shlb (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrb (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarb $7, %dil # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlb $7, %dil # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrb $7, %dil -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarb $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shlb $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrb $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarb $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shlb $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrb $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - sarb %cl, %dil # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shlb %cl, %dil # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shrb %cl, %dil -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - sarb %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shlb %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shrb %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - sarb %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shlb %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shrb %cl, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarw %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlw %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrw %di -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarw (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shlw (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrw (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarw (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shlw (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrw (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarw $7, %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlw $7, %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrw $7, %di -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarw $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shlw $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrw $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarw $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shlw $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrw $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - sarw %cl, %di # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shlw %cl, %di # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shrw %cl, %di -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - sarw %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shlw %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shrw %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - sarw %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shlw %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shrw %cl, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarl %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shll %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrl %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarl (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shll (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrl (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarl (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shll (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrl (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarl $7, %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shll $7, %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrl $7, %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarl $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shll $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrl $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarl $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shll $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrl $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - sarl %cl, %edi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shll %cl, %edi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shrl %cl, %edi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - sarl %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shll %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shrl %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - sarl %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shll %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shrl %cl, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarq %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlq %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrq %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarq (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shlq (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrq (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarq (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shlq (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrq (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarq $7, %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlq $7, %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrq $7, %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarq $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shlq $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrq $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarq $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shlq $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrq $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - sarq %cl, %rdi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shlq %cl, %rdi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shrq %cl, %rdi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - sarq %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shlq %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shrq %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - sarq %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shlq %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shrq %cl, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbb $0, %al # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbb $0, %dil -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbb $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbb $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbb $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbb $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbb $7, %al # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbb $7, %dil -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbb $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbb $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbb $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbb $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbb %sil, %dil -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - sbbb %sil, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock sbbb %sil, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - sbbb (%rax), %dil +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - sbbb %sil, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock sbbb %sil, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - sbbb (%rax), %dil # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbw $0, %ax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbw $0, %di -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbw $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbw $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbw $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbw $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbw $511, %ax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbw $511, %di -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbw $511, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbw $511, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbw $511, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbw $511, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbw $7, %di -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbw $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbw $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbw $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbw $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbw %si, %di -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - sbbw %si, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock sbbw %si, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - sbbw (%rax), %di +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - sbbw %si, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock sbbw %si, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - sbbw (%rax), %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbl $0, %eax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbl $0, %edi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbl $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbl $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbl $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbl $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbl $665536, %eax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbl $665536, %edi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbl $665536, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbl $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbl $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbl $665536, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbl $7, %edi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbl $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbl $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbl $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbl $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbl %esi, %edi -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - sbbl %esi, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock sbbl %esi, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - sbbl (%rax), %edi +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - sbbl %esi, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock sbbl %esi, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - sbbl (%rax), %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbq $0, %rax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbq $0, %rdi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbq $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbq $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbq $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbq $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbq $665536, %rax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbq $665536, %rdi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbq $665536, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbq $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbq $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbq $665536, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbq $7, %rdi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbq $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbq $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbq $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbq $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbq %rsi, %rdi -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - sbbq %rsi, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock sbbq %rsi, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - sbbq (%rax), %rdi -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - scasb %es:(%rdi), %al -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - scasw %es:(%rdi), %ax -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - scasl %es:(%rdi), %eax -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - scasq %es:(%rdi), %rax +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - sbbq %rsi, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock sbbq %rsi, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - sbbq (%rax), %rdi +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - scasb %es:(%rdi), %al +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - scasw %es:(%rdi), %ax +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - scasl %es:(%rdi), %eax +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - scasq %es:(%rdi), %rax # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - seto %al # CHECK-NEXT: 1.00 - - - 0.50 - 1.00 0.50 0.50 0.50 - - - seto (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - setno %al @@ -2714,171 +2714,171 @@ xorq (%rax), %rdi # CHECK-NEXT: 1.00 - - - 0.50 - 1.00 0.50 0.50 0.50 - - - setg (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - setle %al # CHECK-NEXT: 1.00 - - - 0.50 - 1.00 0.50 0.50 0.50 - - - setle (%rax) -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - shldw %cl, %si, %di -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - shrdw %cl, %si, %di -# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - shldw %cl, %si, (%rax) -# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - shrdw %cl, %si, (%rax) +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - shldw %cl, %si, %di +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - shrdw %cl, %si, %di +# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - shldw %cl, %si, (%rax) +# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - shrdw %cl, %si, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - shldw $7, %si, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - shrdw $7, %si, %di -# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - shldw $7, %si, (%rax) -# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - shrdw $7, %si, (%rax) -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - shldl %cl, %esi, %edi -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - shrdl %cl, %esi, %edi -# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - shldl %cl, %esi, (%rax) -# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - shrdl %cl, %esi, (%rax) +# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - shldw $7, %si, (%rax) +# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - shrdw $7, %si, (%rax) +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - shldl %cl, %esi, %edi +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - shrdl %cl, %esi, %edi +# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - shldl %cl, %esi, (%rax) +# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - shrdl %cl, %esi, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - shldl $7, %esi, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - shrdl $7, %esi, %edi -# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - shldl $7, %esi, (%rax) -# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - shrdl $7, %esi, (%rax) -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - shldq %cl, %rsi, %rdi -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - shrdq %cl, %rsi, %rdi -# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - shldq %cl, %rsi, (%rax) -# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - shrdq %cl, %rsi, (%rax) +# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - shldl $7, %esi, (%rax) +# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - shrdl $7, %esi, (%rax) +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - shldq %cl, %rsi, %rdi +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - shrdq %cl, %rsi, %rdi +# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - shldq %cl, %rsi, (%rax) +# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - shrdq %cl, %rsi, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - shldq $7, %rsi, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - shrdq $7, %rsi, %rdi -# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - shldq $7, %rsi, (%rax) -# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - shrdq $7, %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - stc -# CHECK-NEXT: 0.70 0.20 - - - 0.20 0.70 - - - 0.20 - - std -# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 0.40 - - stosb %al, %es:(%rdi) -# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 0.40 - - stosw %ax, %es:(%rdi) -# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 0.40 - - stosl %eax, %es:(%rdi) -# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 0.40 - - stosq %rax, %es:(%rdi) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - subb (%rax), %dil -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - subw (%rax), %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - subl (%rax), %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subq $665536, (%rax) +# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - shldq $7, %rsi, (%rax) +# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - shrdq $7, %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - stc +# CHECK-NEXT: 0.70 0.20 - - - 0.20 0.70 - - - - 0.20 - std +# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 - 0.40 - stosb %al, %es:(%rdi) +# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 - 0.40 - stosw %ax, %es:(%rdi) +# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 - 0.40 - stosl %eax, %es:(%rdi) +# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 - 0.40 - stosq %rax, %es:(%rdi) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - subb (%rax), %dil +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - subw (%rax), %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - subl (%rax), %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subq $665536, (%rax) # CHECK-NEXT: - - - - - - - - - - - - - subq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - subq (%rax), %rdi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - subq (%rax), %rdi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testq $7, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testq %rsi, (%rax) # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - ud2 # CHECK-NEXT: 52.00 31.50 - - 0.50 27.00 31.50 0.50 0.50 0.50 - - - wrmsr -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xaddb %bl, %cl -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - xaddb %bl, (%rcx) -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - lock xaddb %bl, (%rcx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xaddw %bx, %cx -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - xaddw %ax, (%rbx) -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - lock xaddw %ax, (%rbx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xaddl %ebx, %ecx -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - xaddl %eax, (%rbx) -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - lock xaddl %eax, (%rbx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xaddq %rbx, %rcx -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - xaddq %rax, (%rbx) -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - lock xaddq %rax, (%rbx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgb %bl, %cl -# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.80 0.33 - xchgb %bl, (%rbx) -# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.80 0.33 - lock xchgb %bl, (%rbx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgw %bx, %ax -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgw %bx, %cx -# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.80 0.33 - xchgw %ax, (%rbx) -# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.80 0.33 - lock xchgw %ax, (%rbx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgl %ebx, %eax -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgl %ebx, %ecx -# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.80 0.33 - xchgl %eax, (%rbx) -# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.80 0.33 - lock xchgl %eax, (%rbx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgq %rbx, %rax -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgq %rbx, %rcx -# CHECK-NEXT: 1.50 1.00 0.33 0.33 0.50 1.00 1.50 0.50 0.50 0.50 1.00 0.33 - xchgq %rax, (%rbx) -# CHECK-NEXT: 1.50 1.00 0.33 0.33 0.50 1.00 1.50 0.50 0.50 0.50 1.00 0.33 - lock xchgq %rax, (%rbx) -# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.40 0.33 - xlatb -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - xorb (%rax), %dil -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - xorw (%rax), %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - xorl (%rax), %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - xorq (%rax), %rdi +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xaddb %bl, %cl +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - xaddb %bl, (%rcx) +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - lock xaddb %bl, (%rcx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xaddw %bx, %cx +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - xaddw %ax, (%rbx) +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - lock xaddw %ax, (%rbx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xaddl %ebx, %ecx +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - xaddl %eax, (%rbx) +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - lock xaddl %eax, (%rbx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xaddq %rbx, %rcx +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - xaddq %rax, (%rbx) +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - lock xaddq %rax, (%rbx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgb %bl, %cl +# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.33 0.80 - xchgb %bl, (%rbx) +# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.33 0.80 - lock xchgb %bl, (%rbx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgw %bx, %ax +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgw %bx, %cx +# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.33 0.80 - xchgw %ax, (%rbx) +# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.33 0.80 - lock xchgw %ax, (%rbx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgl %ebx, %eax +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgl %ebx, %ecx +# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.33 0.80 - xchgl %eax, (%rbx) +# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.33 0.80 - lock xchgl %eax, (%rbx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgq %rbx, %rax +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgq %rbx, %rcx +# CHECK-NEXT: 1.50 1.00 0.33 0.33 0.50 1.00 1.50 0.50 0.50 0.50 0.33 1.00 - xchgq %rax, (%rbx) +# CHECK-NEXT: 1.50 1.00 0.33 0.33 0.50 1.00 1.50 0.50 0.50 0.50 0.33 1.00 - lock xchgq %rax, (%rbx) +# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.33 0.40 - xlatb +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - xorb (%rax), %dil +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - xorw (%rax), %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - xorl (%rax), %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorq $7, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - xorq (%rax), %rdi diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-x87.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-x87.s index 5947c582df4b3..e50ec3c04ae32 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-x87.s @@ -372,7 +372,7 @@ fyl2xp1 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 131.25 62.75 49.67 49.67 46.00 159.25 74.75 28.00 19.00 19.00 1.00 0.67 7.00 +# CHECK-NEXT: 131.25 62.75 49.67 49.67 46.00 159.25 74.75 28.00 19.00 19.00 0.67 1.00 7.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -520,7 +520,7 @@ fyl2xp1 # CHECK-NEXT: 4.00 2.00 - - - 4.00 5.00 - - - - - - fxch %st(1) # CHECK-NEXT: 4.00 2.00 - - - 4.00 5.00 - - - - - - fxch %st(3) # CHECK-NEXT: 17.25 12.25 16.50 16.50 - 12.75 14.75 - - - - - - fxrstor (%eax) -# CHECK-NEXT: 8.00 11.00 0.67 0.67 19.00 6.00 6.00 19.00 19.00 19.00 1.00 0.67 - fxsave (%eax) +# CHECK-NEXT: 8.00 11.00 0.67 0.67 19.00 6.00 6.00 19.00 19.00 19.00 0.67 1.00 - fxsave (%eax) # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - fxtract # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - fyl2x # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - fyl2xp1 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-xsave.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-xsave.s index 1d1104d487a2d..0b3fd683d357a 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-xsave.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-xsave.s @@ -43,12 +43,12 @@ xsetbv # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 72.00 71.67 2.33 2.33 0.50 53.83 67.17 0.50 0.50 0.50 6.33 1.33 - +# CHECK-NEXT: 72.00 71.67 2.33 2.33 0.50 53.83 67.17 0.50 0.50 0.50 1.33 6.33 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 6.40 6.40 - - - 2.40 6.40 - - - 1.40 - - xgetbv +# CHECK-NEXT: 6.40 6.40 - - - 2.40 6.40 - - - - 1.40 - xgetbv # CHECK-NEXT: 5.25 6.25 0.50 0.50 - 5.25 13.25 - - - - - - xrstor (%rax) # CHECK-NEXT: 5.25 6.25 0.50 0.50 - 5.25 13.25 - - - - - - xrstors (%rax) -# CHECK-NEXT: 41.50 38.50 1.33 1.33 0.50 32.00 22.00 0.50 0.50 0.50 - 1.33 - xsave (%rax) -# CHECK-NEXT: 13.60 14.27 - - - 8.93 12.27 - - - 4.93 - - xsetbv +# CHECK-NEXT: 41.50 38.50 1.33 1.33 0.50 32.00 22.00 0.50 0.50 0.50 1.33 - - xsave (%rax) +# CHECK-NEXT: 13.60 14.27 - - - 8.93 12.27 - - - - 4.93 - xsetbv diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/zero-idioms.s index 90fea632be66b..66f647b1978f0 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/zero-idioms.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/zero-idioms.s @@ -227,14 +227,14 @@ vpxor %ymm3, %ymm3, %ymm5 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 27.00 26.00 - - - 27.00 1.00 - - - 2.00 - - +# CHECK-NEXT: 27.00 26.00 - - - 27.00 1.00 - - - - 2.00 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: - - - - - - - - - - 1.00 - - subl %eax, %eax +# CHECK-NEXT: - - - - - - - - - - - 1.00 - subl %eax, %eax # CHECK-NEXT: - - - - - - 1.00 - - - - - - subq %rax, %rax # CHECK-NEXT: - - - - - 1.00 - - - - - - - xorl %eax, %eax -# CHECK-NEXT: - - - - - - - - - - 1.00 - - xorq %rax, %rax +# CHECK-NEXT: - - - - - - - - - - - 1.00 - xorq %rax, %rax # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpgtb %mm2, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpgtd %mm2, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpgtw %mm2, %mm2 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s index df0053a1dcb9b..25f79397fa071 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s @@ -448,7 +448,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtsi2sd %rcx, %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2sdl (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2sdq (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 cvtss2sd %xmm0, %xmm2 +# CHECK-NEXT: 2 1 1.00 cvtss2sd %xmm0, %xmm2 # CHECK-NEXT: 2 7 1.00 * cvtss2sd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvttpd2dq %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * cvttpd2dq (%rax), %xmm2 @@ -687,7 +687,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 172.00 75.83 117.33 17.00 101.83 67.00 67.00 +# CHECK-NEXT: - 172.00 75.83 117.33 17.00 102.83 67.00 67.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -732,7 +732,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - cvtsi2sd %rcx, %xmm2 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtsi2sdl (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtsi2sdq (%rax), %xmm2 -# CHECK-NEXT: - - 1.00 - - - - - cvtss2sd %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 - - 1.00 - - cvtss2sd %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 cvtss2sd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - cvttpd2dq %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvttpd2dq (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s index 1b196b4355a6d..028625013a85c 100644 --- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s @@ -1115,9 +1115,9 @@ vzeroupper # CHECK-NEXT: 1 3 1.00 vcomiss %xmm0, %xmm1 # CHECK-NEXT: 2 8 1.00 * vcomiss (%rax), %xmm1 # CHECK-NEXT: 2 4 1.00 vcvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: 3 9 1.00 * vcvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: 2 9 1.00 * vcvtdq2pd (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 vcvtdq2pd %xmm0, %ymm2 -# CHECK-NEXT: 3 11 1.00 * vcvtdq2pd (%rax), %ymm2 +# CHECK-NEXT: 2 11 1.00 * vcvtdq2pd (%rax), %ymm2 # CHECK-NEXT: 1 3 1.00 vcvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * vcvtdq2ps (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 vcvtdq2ps %ymm0, %ymm2 @@ -1137,7 +1137,7 @@ vzeroupper # CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: 2 6 1.00 * vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: 3 9 1.00 * vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: 2 9 1.00 * vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: 2 4 1.00 vcvtsd2si %xmm0, %ecx # CHECK-NEXT: 2 4 1.00 vcvtsd2si %xmm0, %rcx # CHECK-NEXT: 3 9 1.00 * vcvtsd2si (%rax), %ecx @@ -1736,7 +1736,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 257.00 216.25 247.25 173.17 173.17 38.00 424.25 3.25 12.67 +# CHECK-NEXT: - 257.00 216.25 247.25 173.17 173.17 38.00 421.25 3.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1825,9 +1825,9 @@ vzeroupper # CHECK-NEXT: - - - 1.00 - - - - - - vcomiss %xmm0, %xmm1 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcomiss (%rax), %xmm1 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtdq2pd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtdq2pd %xmm0, %ymm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtdq2pd (%rax), %ymm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtdq2pd (%rax), %ymm2 # CHECK-NEXT: - - - 1.00 - - - - - - vcvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtdq2ps (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - vcvtdq2ps %ymm0, %ymm2 @@ -1847,7 +1847,7 @@ vzeroupper # CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtsd2si %xmm0, %ecx # CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtsd2si %xmm0, %rcx # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtsd2si (%rax), %ecx diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s index 9fcd03bfb2fd4..07870d92dac55 100644 --- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s +++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s @@ -45,14 +45,14 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 4.00 4.00 1.67 1.67 2.00 4.00 - 0.67 +# CHECK-NEXT: - - - 8.00 1.67 1.67 2.00 4.00 - 0.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: -# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtph2ps %xmm0, %xmm2 -# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2 -# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtph2ps %xmm0, %ymm2 -# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtph2ps %xmm0, %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtph2ps %xmm0, %ymm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.33 0.33 1.00 - - 0.33 vcvtps2ph $0, %xmm0, (%rax) # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s index e76d90521afa9..8851be4679a1e 100644 --- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s @@ -423,7 +423,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 comisd %xmm0, %xmm1 # CHECK-NEXT: 2 8 1.00 * comisd (%rax), %xmm1 # CHECK-NEXT: 2 4 1.00 cvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: 3 9 1.00 * cvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: 2 9 1.00 * cvtdq2pd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 cvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * cvtdq2ps (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtpd2dq %xmm0, %xmm2 @@ -433,7 +433,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: 3 9 1.00 * cvtpd2ps (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtpi2pd %mm0, %xmm2 -# CHECK-NEXT: 3 9 1.00 * cvtpi2pd (%rax), %xmm2 +# CHECK-NEXT: 2 9 1.00 * cvtpi2pd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 cvtps2dq %xmm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * cvtps2dq (%rax), %xmm2 # CHECK-NEXT: 2 2 1.00 cvtps2pd %xmm0, %xmm2 @@ -689,7 +689,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 78.00 70.75 95.75 63.17 63.17 14.00 119.25 2.25 4.67 +# CHECK-NEXT: - 78.00 70.75 95.75 63.17 63.17 14.00 117.25 2.25 4.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -709,7 +709,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - comisd %xmm0, %xmm1 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - comisd (%rax), %xmm1 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - cvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtdq2pd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - cvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtdq2ps (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtpd2dq %xmm0, %xmm2 @@ -719,7 +719,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - cvtpd2ps (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtpi2pd %mm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - cvtpi2pd (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtpi2pd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - cvtps2dq %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtps2dq (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - cvtps2pd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s index 49db25cb0bdfb..7f07fd56fe60d 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s @@ -1137,7 +1137,7 @@ vzeroupper # CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: 2 5 1.00 vcvtsd2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 vcvtsd2si %xmm0, %rcx # CHECK-NEXT: 3 10 1.00 * vcvtsd2si (%rax), %ecx @@ -1152,7 +1152,7 @@ vzeroupper # CHECK-NEXT: 3 5 2.00 vcvtsi2ss %rcx, %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * vcvtsi2ssl (%rax), %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * vcvtsi2ssq (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 1 1 1.00 vcvtss2sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 1 1.00 vcvtss2sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 7 1.00 * vcvtss2sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 2 5 1.00 vcvtss2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 vcvtss2si %xmm0, %rcx @@ -1734,7 +1734,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 572.00 248.50 319.00 39.00 369.50 179.50 179.50 +# CHECK-NEXT: - 572.00 248.50 319.00 39.00 371.50 179.50 179.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -1845,7 +1845,7 @@ vzeroupper # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtsd2si %xmm0, %ecx # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtsd2si %xmm0, %rcx # CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 vcvtsd2si (%rax), %ecx @@ -1860,7 +1860,7 @@ vzeroupper # CHECK-NEXT: - - - 1.00 - 2.00 - - vcvtsi2ss %rcx, %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtsi2ssl (%rax), %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtsi2ssq (%rax), %xmm0, %xmm2 -# CHECK-NEXT: - - 1.00 - - - - - vcvtss2sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtss2sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtss2sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtss2si %xmm0, %ecx # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtss2si %xmm0, %rcx diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s index 8736c1c6234af..9ca3b9a7b4608 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s @@ -636,6 +636,66 @@ vpgatherdd (%rax,%zmm1,2), %zmm2 {k1} vpgatherqq (%rax,%zmm1,2), %zmm2 {k1} vpgatherqd (%rax,%zmm1,2), %ymm2 {k1} +vpmovdb %zmm19, %xmm16 +vpmovdb %zmm19, (%rax) +vpmovdb %zmm19, %xmm16 {k1} +vpmovdb %zmm19, (%rax) {k1} +vpmovdb %zmm19, %xmm16 {k1}{z} + +vpmovdw %zmm19, %ymm16 +vpmovdw %zmm19, (%rax) +vpmovdw %zmm19, %ymm16 {k1} +vpmovdw %zmm19, (%rax) {k1} +vpmovdw %zmm19, %ymm16 {k1}{z} + +vpmovqb %zmm19, %xmm16 +vpmovqb %zmm19, (%rax) +vpmovqb %zmm19, %xmm16 {k1} +vpmovqb %zmm19, (%rax) {k1} +vpmovqb %zmm19, %xmm16 {k1}{z} + +vpmovqd %zmm19, %ymm16 +vpmovqd %zmm19, (%rax) +vpmovqd %zmm19, %ymm16 {k1} +vpmovqd %zmm19, (%rax) {k1} +vpmovqd %zmm19, %ymm16 {k1}{z} + +vpmovqw %zmm19, %xmm16 +vpmovqw %zmm19, (%rax) +vpmovqw %zmm19, %xmm16 {k1} +vpmovqw %zmm19, (%rax) {k1} +vpmovqw %zmm19, %xmm16 {k1}{z} + +vpmovsdb %zmm19, %xmm16 +vpmovsdb %zmm19, (%rax) +vpmovsdb %zmm19, %xmm16 {k1} +vpmovsdb %zmm19, (%rax) {k1} +vpmovsdb %zmm19, %xmm16 {k1}{z} + +vpmovsdw %zmm19, %ymm16 +vpmovsdw %zmm19, (%rax) +vpmovsdw %zmm19, %ymm16 {k1} +vpmovsdw %zmm19, (%rax) {k1} +vpmovsdw %zmm19, %ymm16 {k1}{z} + +vpmovsqb %zmm19, %xmm16 +vpmovsqb %zmm19, (%rax) +vpmovsqb %zmm19, %xmm16 {k1} +vpmovsqb %zmm19, (%rax) {k1} +vpmovsqb %zmm19, %xmm16 {k1}{z} + +vpmovsqd %zmm19, %ymm16 +vpmovsqd %zmm19, (%rax) +vpmovsqd %zmm19, %ymm16 {k1} +vpmovsqd %zmm19, (%rax) {k1} +vpmovsqd %zmm19, %ymm16 {k1}{z} + +vpmovsqw %zmm19, %xmm16 +vpmovsqw %zmm19, (%rax) +vpmovsqw %zmm19, %xmm16 {k1} +vpmovsqw %zmm19, (%rax) {k1} +vpmovsqw %zmm19, %xmm16 {k1}{z} + vpmovsxbd %xmm16, %zmm19 vpmovsxbd (%rax), %zmm19 vpmovsxbd %xmm16, %zmm19 {k1} @@ -671,6 +731,36 @@ vpmovsxwq (%rax), %zmm19 {k1} vpmovsxwq %xmm16, %zmm19 {z}{k1} vpmovsxwq (%rax), %zmm19 {z}{k1} +vpmovusdb %zmm19, %xmm16 +vpmovusdb %zmm19, (%rax) +vpmovusdb %zmm19, %xmm16 {k1} +vpmovusdb %zmm19, (%rax) {k1} +vpmovusdb %zmm19, %xmm16 {k1}{z} + +vpmovusdw %zmm19, %ymm16 +vpmovusdw %zmm19, (%rax) +vpmovusdw %zmm19, %ymm16 {k1} +vpmovusdw %zmm19, (%rax) {k1} +vpmovusdw %zmm19, %ymm16 {k1}{z} + +vpmovusqb %zmm19, %xmm16 +vpmovusqb %zmm19, (%rax) +vpmovusqb %zmm19, %xmm16 {k1} +vpmovusqb %zmm19, (%rax) {k1} +vpmovusqb %zmm19, %xmm16 {k1}{z} + +vpmovusqd %zmm19, %ymm16 +vpmovusqd %zmm19, (%rax) +vpmovusqd %zmm19, %ymm16 {k1} +vpmovusqd %zmm19, (%rax) {k1} +vpmovusqd %zmm19, %ymm16 {k1}{z} + +vpmovusqw %zmm19, %xmm16 +vpmovusqw %zmm19, (%rax) +vpmovusqw %zmm19, %xmm16 {k1} +vpmovusqw %zmm19, (%rax) {k1} +vpmovusqw %zmm19, %xmm16 {k1}{z} + vpmovzxbd %xmm16, %zmm19 vpmovzxbd (%rax), %zmm19 vpmovzxbd %xmm16, %zmm19 {k1} @@ -1263,14 +1353,14 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 2 10 1.00 * vcvttps2dq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vcvttps2dq (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 2 2 1.00 vcvtps2pd %ymm16, %zmm19 -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %zmm19 -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax), %zmm19 +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax){1to8}, %zmm19 # CHECK-NEXT: 2 2 1.00 vcvtps2pd %ymm16, %zmm19 {%k1} -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 2 2 1.00 vcvtps2pd %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 2 5 1.00 vcvtsd2usi %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 vcvtsd2usi %xmm0, %rcx # CHECK-NEXT: 3 10 1.00 * vcvtsd2usi (%rax), %ecx @@ -1646,6 +1736,56 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 5 0.50 * vpgatherdd (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: 1 5 0.50 * vpgatherqq (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: 1 5 0.50 * vpgatherqd (%rax,%zmm1,2), %ymm2 {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovdb %zmm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovdb %zmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovdw %zmm19, %ymm16 +# CHECK-NEXT: 2 8 1.00 * vpmovdw %zmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovqb %zmm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovqb %zmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovqd %zmm19, %ymm16 +# CHECK-NEXT: 2 8 1.00 * vpmovqd %zmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovqw %zmm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovqw %zmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovqw %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovsdb %zmm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovsdb %zmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovsdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovsdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovsdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovsdw %zmm19, %ymm16 +# CHECK-NEXT: 2 8 1.00 * vpmovsdw %zmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovsdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovsdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovsdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovsqb %zmm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovsqb %zmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovsqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovsqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovsqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovsqd %zmm19, %ymm16 +# CHECK-NEXT: 2 8 1.00 * vpmovsqd %zmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovsqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovsqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovsqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovsqw %zmm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovsqw %zmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovsqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovsqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovsqw %zmm19, %xmm16 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vpmovsxbd %xmm16, %zmm19 # CHECK-NEXT: 2 8 1.00 * vpmovsxbd (%rax), %zmm19 # CHECK-NEXT: 1 1 1.00 vpmovsxbd %xmm16, %zmm19 {%k1} @@ -1676,6 +1816,31 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 2 8 1.00 * vpmovsxwq (%rax), %zmm19 {%k1} # CHECK-NEXT: 1 1 1.00 vpmovsxwq %xmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 1.00 * vpmovsxwq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovusdb %zmm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovusdb %zmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovusdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovusdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovusdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovusdw %zmm19, %ymm16 +# CHECK-NEXT: 2 8 1.00 * vpmovusdw %zmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovusdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovusdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovusdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovusqb %zmm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovusqb %zmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovusqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovusqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovusqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovusqd %zmm19, %ymm16 +# CHECK-NEXT: 2 8 1.00 * vpmovusqd %zmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovusqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovusqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovusqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovusqw %zmm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovusqw %zmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovusqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovusqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovusqw %zmm19, %xmm16 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vpmovzxbd %xmm16, %zmm19 # CHECK-NEXT: 2 8 1.00 * vpmovzxbd (%rax), %zmm19 # CHECK-NEXT: 1 1 1.00 vpmovzxbd %xmm16, %zmm19 {%k1} @@ -2053,7 +2218,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 1506.00 198.00 335.00 25.00 523.00 304.50 304.50 +# CHECK-NEXT: - 1506.00 198.00 335.00 25.00 604.00 319.50 319.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -2230,14 +2395,14 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %ymm16, %zmm19 -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %zmm19 -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax), %zmm19 +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax){1to8}, %zmm19 # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %ymm16, %zmm19 {%k1} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtsd2usi %xmm0, %ecx # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtsd2usi %xmm0, %rcx # CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 vcvtsd2usi (%rax), %ecx @@ -2613,6 +2778,56 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - - - - 0.50 0.50 vpgatherdd (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: - - - - - - 0.50 0.50 vpgatherqq (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: - - - - - - 0.50 0.50 vpgatherqd (%rax,%zmm1,2), %ymm2 {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovdb %zmm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovdb %zmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovdw %zmm19, %ymm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovdw %zmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovqb %zmm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovqb %zmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovqd %zmm19, %ymm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovqd %zmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovqw %zmm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovqw %zmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovqw %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsdb %zmm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsdb %zmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovsdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsdw %zmm19, %ymm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsdw %zmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovsdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsqb %zmm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsqb %zmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovsqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsqd %zmm19, %ymm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsqd %zmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovsqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsqw %zmm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsqw %zmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovsqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsqw %zmm19, %xmm16 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - vpmovsxbd %xmm16, %zmm19 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsxbd (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - vpmovsxbd %xmm16, %zmm19 {%k1} @@ -2643,6 +2858,31 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsxwq (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - vpmovsxwq %xmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsxwq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusdb %zmm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusdb %zmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovusdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusdw %zmm19, %ymm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusdw %zmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovusdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusqb %zmm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusqb %zmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovusqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusqd %zmm19, %ymm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusqd %zmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovusqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusqw %zmm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusqw %zmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovusqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusqw %zmm19, %xmm16 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - vpmovzxbd %xmm16, %zmm19 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovzxbd (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - vpmovzxbd %xmm16, %zmm19 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512bw.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512bw.s index f028580f1a4ba..2cdd65ab43f10 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512bw.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512bw.s @@ -373,16 +373,19 @@ vpmovswb %zmm16, %ymm19 vpmovswb %zmm16, (%rax) vpmovswb %zmm16, %ymm19 {k1} vpmovswb %zmm16, (%rax) {k1} +vpmovswb %zmm16, %ymm19 {z}{k1} vpmovuswb %zmm16, %ymm19 vpmovuswb %zmm16, (%rax) vpmovuswb %zmm16, %ymm19 {k1} vpmovuswb %zmm16, (%rax) {k1} +vpmovuswb %zmm16, %ymm19 {z}{k1} vpmovwb %zmm16, %ymm19 vpmovwb %zmm16, (%rax) vpmovwb %zmm16, %ymm19 {k1} vpmovwb %zmm16, (%rax) {k1} +vpmovwb %zmm16, %ymm19 {z}{k1} vpmovzxbw %ymm16, %zmm19 vpmovzxbw (%rax), %zmm19 @@ -919,14 +922,17 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 2 8 1.00 * vpmovswb %zmm16, (%rax) # CHECK-NEXT: 1 1 1.00 vpmovswb %zmm16, %ymm19 {%k1} # CHECK-NEXT: 2 8 1.00 * vpmovswb %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovswb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vpmovuswb %zmm16, %ymm19 # CHECK-NEXT: 2 8 1.00 * vpmovuswb %zmm16, (%rax) # CHECK-NEXT: 1 1 1.00 vpmovuswb %zmm16, %ymm19 {%k1} # CHECK-NEXT: 2 8 1.00 * vpmovuswb %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovuswb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vpmovwb %zmm16, %ymm19 # CHECK-NEXT: 2 8 1.00 * vpmovwb %zmm16, (%rax) # CHECK-NEXT: 1 1 1.00 vpmovwb %zmm16, %ymm19 {%k1} # CHECK-NEXT: 2 8 1.00 * vpmovwb %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovwb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vpmovzxbw %ymm16, %zmm19 # CHECK-NEXT: 2 8 1.00 * vpmovzxbw (%rax), %zmm19 # CHECK-NEXT: 1 1 1.00 vpmovzxbw %ymm16, %zmm19 {%k1} @@ -1124,7 +1130,7 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - 118.00 130.00 8.00 272.00 117.00 117.00 +# CHECK-NEXT: - - 118.00 130.00 8.00 275.00 117.00 117.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -1436,14 +1442,17 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovswb %zmm16, (%rax) # CHECK-NEXT: - - - - - 1.00 - - vpmovswb %zmm16, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovswb %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovswb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - vpmovuswb %zmm16, %ymm19 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovuswb %zmm16, (%rax) # CHECK-NEXT: - - - - - 1.00 - - vpmovuswb %zmm16, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovuswb %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovuswb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - vpmovwb %zmm16, %ymm19 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovwb %zmm16, (%rax) # CHECK-NEXT: - - - - - 1.00 - - vpmovwb %zmm16, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovwb %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovwb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - vpmovzxbw %ymm16, %zmm19 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovzxbw (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - vpmovzxbw %ymm16, %zmm19 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512bwvl.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512bwvl.s index 946335bf7c980..35ca69ea7aa72 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512bwvl.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512bwvl.s @@ -621,31 +621,37 @@ vpmovswb %xmm16, %xmm19 vpmovswb %xmm16, (%rax) vpmovswb %xmm16, %xmm19 {k1} vpmovswb %xmm16, (%rax) {k1} +vpmovswb %xmm16, %xmm19 {z}{k1} vpmovswb %ymm16, %xmm19 vpmovswb %ymm16, (%rax) vpmovswb %ymm16, %xmm19 {k1} vpmovswb %ymm16, (%rax) {k1} +vpmovswb %ymm16, %xmm19 {z}{k1} vpmovuswb %xmm16, %xmm19 vpmovuswb %xmm16, (%rax) vpmovuswb %xmm16, %xmm19 {k1} vpmovuswb %xmm16, (%rax) {k1} +vpmovuswb %xmm16, %xmm19 {z}{k1} vpmovuswb %ymm16, %xmm19 vpmovuswb %ymm16, (%rax) vpmovuswb %ymm16, %xmm19 {k1} vpmovuswb %ymm16, (%rax) {k1} +vpmovuswb %ymm16, %xmm19 {z}{k1} vpmovwb %xmm16, %xmm19 vpmovwb %xmm16, (%rax) vpmovwb %xmm16, %xmm19 {k1} vpmovwb %xmm16, (%rax) {k1} +vpmovwb %xmm16, %xmm19 {z}{k1} vpmovwb %ymm16, %xmm19 vpmovwb %ymm16, (%rax) vpmovwb %ymm16, %xmm19 {k1} vpmovwb %ymm16, (%rax) {k1} +vpmovwb %ymm16, %xmm19 {z}{k1} vpmovzxbw %xmm16, %xmm19 vpmovzxbw (%rax), %xmm19 @@ -1620,26 +1626,32 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 2 8 1.00 * vpmovswb %xmm16, (%rax) # CHECK-NEXT: 1 1 1.00 vpmovswb %xmm16, %xmm19 {%k1} # CHECK-NEXT: 2 8 1.00 * vpmovswb %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovswb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vpmovswb %ymm16, %xmm19 # CHECK-NEXT: 2 8 1.00 * vpmovswb %ymm16, (%rax) # CHECK-NEXT: 1 1 1.00 vpmovswb %ymm16, %xmm19 {%k1} # CHECK-NEXT: 2 8 1.00 * vpmovswb %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovswb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vpmovuswb %xmm16, %xmm19 # CHECK-NEXT: 2 8 1.00 * vpmovuswb %xmm16, (%rax) # CHECK-NEXT: 1 1 1.00 vpmovuswb %xmm16, %xmm19 {%k1} # CHECK-NEXT: 2 8 1.00 * vpmovuswb %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovuswb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vpmovuswb %ymm16, %xmm19 # CHECK-NEXT: 2 8 1.00 * vpmovuswb %ymm16, (%rax) # CHECK-NEXT: 1 1 1.00 vpmovuswb %ymm16, %xmm19 {%k1} # CHECK-NEXT: 2 8 1.00 * vpmovuswb %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovuswb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vpmovwb %xmm16, %xmm19 # CHECK-NEXT: 2 8 1.00 * vpmovwb %xmm16, (%rax) # CHECK-NEXT: 1 1 1.00 vpmovwb %xmm16, %xmm19 {%k1} # CHECK-NEXT: 2 8 1.00 * vpmovwb %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovwb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vpmovwb %ymm16, %xmm19 # CHECK-NEXT: 2 8 1.00 * vpmovwb %ymm16, (%rax) # CHECK-NEXT: 1 1 1.00 vpmovwb %ymm16, %xmm19 {%k1} # CHECK-NEXT: 2 8 1.00 * vpmovwb %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovwb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: 1 1 0.50 vpmovzxbw %xmm16, %xmm19 # CHECK-NEXT: 2 7 0.50 * vpmovzxbw (%rax), %xmm19 # CHECK-NEXT: 1 1 0.50 vpmovzxbw %xmm16, %xmm19 {%k1} @@ -2021,7 +2033,7 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - 214.67 282.67 8.00 458.67 226.00 226.00 +# CHECK-NEXT: - - 214.67 282.67 8.00 464.67 226.00 226.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -2553,26 +2565,32 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovswb %xmm16, (%rax) # CHECK-NEXT: - - - - - 1.00 - - vpmovswb %xmm16, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovswb %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovswb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - vpmovswb %ymm16, %xmm19 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovswb %ymm16, (%rax) # CHECK-NEXT: - - - - - 1.00 - - vpmovswb %ymm16, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovswb %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovswb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - vpmovuswb %xmm16, %xmm19 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovuswb %xmm16, (%rax) # CHECK-NEXT: - - - - - 1.00 - - vpmovuswb %xmm16, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovuswb %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovuswb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - vpmovuswb %ymm16, %xmm19 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovuswb %ymm16, (%rax) # CHECK-NEXT: - - - - - 1.00 - - vpmovuswb %ymm16, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovuswb %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovuswb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - vpmovwb %xmm16, %xmm19 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovwb %xmm16, (%rax) # CHECK-NEXT: - - - - - 1.00 - - vpmovwb %xmm16, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovwb %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovwb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - vpmovwb %ymm16, %xmm19 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovwb %ymm16, (%rax) # CHECK-NEXT: - - - - - 1.00 - - vpmovwb %ymm16, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovwb %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovwb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - 0.50 - 0.50 - - vpmovzxbw %xmm16, %xmm19 # CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpmovzxbw (%rax), %xmm19 # CHECK-NEXT: - - - 0.50 - 0.50 - - vpmovzxbw %xmm16, %xmm19 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s index 8bf3c21891f7f..6bf257299b9f7 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s @@ -1187,6 +1187,126 @@ vpgatherdd (%rax,%xmm1,2), %xmm2 {k1} vpgatherqq (%rax,%xmm1,2), %xmm2 {k1} vpgatherqd (%rax,%xmm1,2), %xmm2 {k1} +vpmovdb %xmm19, %xmm16 +vpmovdb %xmm19, (%rax) +vpmovdb %xmm19, %xmm16 {k1} +vpmovdb %xmm19, (%rax) {k1} +vpmovdb %xmm19, %xmm16 {k1}{z} + +vpmovdb %ymm19, %xmm16 +vpmovdb %ymm19, (%rax) +vpmovdb %ymm19, %xmm16 {k1} +vpmovdb %ymm19, (%rax) {k1} +vpmovdb %ymm19, %xmm16 {k1}{z} + +vpmovdw %xmm19, %xmm16 +vpmovdw %xmm19, (%rax) +vpmovdw %xmm19, %xmm16 {k1} +vpmovdw %xmm19, (%rax) {k1} +vpmovdw %xmm19, %xmm16 {k1}{z} + +vpmovdw %ymm19, %xmm16 +vpmovdw %ymm19, (%rax) +vpmovdw %ymm19, %xmm16 {k1} +vpmovdw %ymm19, (%rax) {k1} +vpmovdw %ymm19, %xmm16 {k1}{z} + +vpmovqb %xmm19, %xmm16 +vpmovqb %xmm19, (%rax) +vpmovqb %xmm19, %xmm16 {k1} +vpmovqb %xmm19, (%rax) {k1} +vpmovqb %xmm19, %xmm16 {k1}{z} + +vpmovqb %ymm19, %xmm16 +vpmovqb %ymm19, (%rax) +vpmovqb %ymm19, %xmm16 {k1} +vpmovqb %ymm19, (%rax) {k1} +vpmovqb %ymm19, %xmm16 {k1}{z} + +vpmovqd %xmm19, %xmm16 +vpmovqd %xmm19, (%rax) +vpmovqd %xmm19, %xmm16 {k1} +vpmovqd %xmm19, (%rax) {k1} +vpmovqd %xmm19, %xmm16 {k1}{z} + +vpmovqd %ymm19, %xmm16 +vpmovqd %ymm19, (%rax) +vpmovqd %ymm19, %xmm16 {k1} +vpmovqd %ymm19, (%rax) {k1} +vpmovqd %ymm19, %xmm16 {k1}{z} + +vpmovqw %xmm19, %xmm16 +vpmovqw %xmm19, (%rax) +vpmovqw %xmm19, %xmm16 {k1} +vpmovqw %xmm19, (%rax) {k1} +vpmovqw %xmm19, %xmm16 {k1}{z} + +vpmovqw %ymm19, %xmm16 +vpmovqw %ymm19, (%rax) +vpmovqw %ymm19, %xmm16 {k1} +vpmovqw %ymm19, (%rax) {k1} +vpmovqw %ymm19, %xmm16 {k1}{z} + +vpmovsdb %xmm19, %xmm16 +vpmovsdb %xmm19, (%rax) +vpmovsdb %xmm19, %xmm16 {k1} +vpmovsdb %xmm19, (%rax) {k1} +vpmovsdb %xmm19, %xmm16 {k1}{z} + +vpmovsdb %ymm19, %xmm16 +vpmovsdb %ymm19, (%rax) +vpmovsdb %ymm19, %xmm16 {k1} +vpmovsdb %ymm19, (%rax) {k1} +vpmovsdb %ymm19, %xmm16 {k1}{z} + +vpmovsdw %xmm19, %xmm16 +vpmovsdw %xmm19, (%rax) +vpmovsdw %xmm19, %xmm16 {k1} +vpmovsdw %xmm19, (%rax) {k1} +vpmovsdw %xmm19, %xmm16 {k1}{z} + +vpmovsdw %ymm19, %xmm16 +vpmovsdw %ymm19, (%rax) +vpmovsdw %ymm19, %xmm16 {k1} +vpmovsdw %ymm19, (%rax) {k1} +vpmovsdw %ymm19, %xmm16 {k1}{z} + +vpmovsqb %xmm19, %xmm16 +vpmovsqb %xmm19, (%rax) +vpmovsqb %xmm19, %xmm16 {k1} +vpmovsqb %xmm19, (%rax) {k1} +vpmovsqb %xmm19, %xmm16 {k1}{z} + +vpmovsqb %ymm19, %xmm16 +vpmovsqb %ymm19, (%rax) +vpmovsqb %ymm19, %xmm16 {k1} +vpmovsqb %ymm19, (%rax) {k1} +vpmovsqb %ymm19, %xmm16 {k1}{z} + +vpmovsqd %xmm19, %xmm16 +vpmovsqd %xmm19, (%rax) +vpmovsqd %xmm19, %xmm16 {k1} +vpmovsqd %xmm19, (%rax) {k1} +vpmovsqd %xmm19, %xmm16 {k1}{z} + +vpmovsqd %ymm19, %xmm16 +vpmovsqd %ymm19, (%rax) +vpmovsqd %ymm19, %xmm16 {k1} +vpmovsqd %ymm19, (%rax) {k1} +vpmovsqd %ymm19, %xmm16 {k1}{z} + +vpmovsqw %xmm19, %xmm16 +vpmovsqw %xmm19, (%rax) +vpmovsqw %xmm19, %xmm16 {k1} +vpmovsqw %xmm19, (%rax) {k1} +vpmovsqw %xmm19, %xmm16 {k1}{z} + +vpmovsqw %ymm19, %xmm16 +vpmovsqw %ymm19, (%rax) +vpmovsqw %ymm19, %xmm16 {k1} +vpmovsqw %ymm19, (%rax) {k1} +vpmovsqw %ymm19, %xmm16 {k1}{z} + vpmovsxbd %xmm16, %xmm19 vpmovsxbd (%rax), %xmm19 vpmovsxbd %xmm16, %xmm19 {k1} @@ -1257,6 +1377,66 @@ vpmovsxwq (%rax), %ymm19 {k1} vpmovsxwq %xmm16, %ymm19 {z}{k1} vpmovsxwq (%rax), %ymm19 {z}{k1} +vpmovusdb %xmm19, %xmm16 +vpmovusdb %xmm19, (%rax) +vpmovusdb %xmm19, %xmm16 {k1} +vpmovusdb %xmm19, (%rax) {k1} +vpmovusdb %xmm19, %xmm16 {k1}{z} + +vpmovusdb %ymm19, %xmm16 +vpmovusdb %ymm19, (%rax) +vpmovusdb %ymm19, %xmm16 {k1} +vpmovusdb %ymm19, (%rax) {k1} +vpmovusdb %ymm19, %xmm16 {k1}{z} + +vpmovusdw %xmm19, %xmm16 +vpmovusdw %xmm19, (%rax) +vpmovusdw %xmm19, %xmm16 {k1} +vpmovusdw %xmm19, (%rax) {k1} +vpmovusdw %xmm19, %xmm16 {k1}{z} + +vpmovusdw %ymm19, %xmm16 +vpmovusdw %ymm19, (%rax) +vpmovusdw %ymm19, %xmm16 {k1} +vpmovusdw %ymm19, (%rax) {k1} +vpmovusdw %ymm19, %xmm16 {k1}{z} + +vpmovusqb %xmm19, %xmm16 +vpmovusqb %xmm19, (%rax) +vpmovusqb %xmm19, %xmm16 {k1} +vpmovusqb %xmm19, (%rax) {k1} +vpmovusqb %xmm19, %xmm16 {k1}{z} + +vpmovusqb %ymm19, %xmm16 +vpmovusqb %ymm19, (%rax) +vpmovusqb %ymm19, %xmm16 {k1} +vpmovusqb %ymm19, (%rax) {k1} +vpmovusqb %ymm19, %xmm16 {k1}{z} + +vpmovusqd %xmm19, %xmm16 +vpmovusqd %xmm19, (%rax) +vpmovusqd %xmm19, %xmm16 {k1} +vpmovusqd %xmm19, (%rax) {k1} +vpmovusqd %xmm19, %xmm16 {k1}{z} + +vpmovusqd %ymm19, %xmm16 +vpmovusqd %ymm19, (%rax) +vpmovusqd %ymm19, %xmm16 {k1} +vpmovusqd %ymm19, (%rax) {k1} +vpmovusqd %ymm19, %xmm16 {k1}{z} + +vpmovusqw %xmm19, %xmm16 +vpmovusqw %xmm19, (%rax) +vpmovusqw %xmm19, %xmm16 {k1} +vpmovusqw %xmm19, (%rax) {k1} +vpmovusqw %xmm19, %xmm16 {k1}{z} + +vpmovusqw %ymm19, %xmm16 +vpmovusqw %ymm19, (%rax) +vpmovusqw %ymm19, %xmm16 {k1} +vpmovusqw %ymm19, (%rax) {k1} +vpmovusqw %ymm19, %xmm16 {k1}{z} + vpmovzxbd %xmm16, %xmm19 vpmovzxbd (%rax), %xmm19 vpmovzxbd %xmm16, %xmm19 {k1} @@ -1970,14 +2150,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm16, %ymm19 -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %ymm19 -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to4}, %ymm19 +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax), %ymm19 +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax){1to4}, %ymm19 # CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm16, %ymm19 {%k1} -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %ymm19 {%k1} -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 1 3 1.00 vcvtps2udq %xmm16, %xmm19 # CHECK-NEXT: 2 9 1.00 * vcvtps2udq (%rax), %xmm19 # CHECK-NEXT: 2 9 1.00 * vcvtps2udq (%rax){1to4}, %xmm19 @@ -2784,6 +2964,106 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1 5 0.50 * vpgatherdd (%rax,%xmm1,2), %xmm2 {%k1} # CHECK-NEXT: 1 5 0.50 * vpgatherqq (%rax,%xmm1,2), %xmm2 {%k1} # CHECK-NEXT: 1 5 0.50 * vpgatherqd (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovdb %xmm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovdb %xmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovdb %ymm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovdb %ymm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovdw %xmm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovdw %xmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovdw %ymm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovdw %ymm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovqb %xmm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovqb %xmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovqb %ymm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovqb %ymm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovqd %xmm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovqd %xmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovqd %ymm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovqd %ymm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovqw %xmm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovqw %xmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovqw %ymm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovqw %ymm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovqw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovsdb %xmm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovsdb %xmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovsdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovsdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovsdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovsdb %ymm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovsdb %ymm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovsdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovsdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovsdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovsdw %xmm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovsdw %xmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovsdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovsdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovsdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovsdw %ymm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovsdw %ymm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovsdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovsdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovsdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovsqb %xmm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovsqb %xmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovsqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovsqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovsqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovsqb %ymm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovsqb %ymm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovsqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovsqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovsqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovsqd %xmm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovsqd %xmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovsqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovsqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovsqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovsqd %ymm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovsqd %ymm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovsqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovsqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovsqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovsqw %xmm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovsqw %xmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovsqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovsqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovsqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovsqw %ymm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovsqw %ymm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovsqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovsqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovsqw %ymm19, %xmm16 {%k1} {z} # CHECK-NEXT: 1 1 0.50 vpmovsxbd %xmm16, %xmm19 # CHECK-NEXT: 2 7 0.50 * vpmovsxbd (%rax), %xmm19 # CHECK-NEXT: 1 1 0.50 vpmovsxbd %xmm16, %xmm19 {%k1} @@ -2844,6 +3124,56 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 2 8 1.00 * vpmovsxwq (%rax), %ymm19 {%k1} # CHECK-NEXT: 1 1 1.00 vpmovsxwq %xmm16, %ymm19 {%k1} {z} # CHECK-NEXT: 2 8 1.00 * vpmovsxwq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovusdb %xmm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovusdb %xmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovusdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovusdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovusdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovusdb %ymm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovusdb %ymm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovusdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovusdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovusdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovusdw %xmm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovusdw %xmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovusdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovusdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovusdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovusdw %ymm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovusdw %ymm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovusdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovusdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovusdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovusqb %xmm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovusqb %xmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovusqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovusqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovusqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovusqb %ymm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovusqb %ymm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovusqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovusqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovusqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovusqd %xmm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovusqd %xmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovusqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovusqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovusqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovusqd %ymm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovusqd %ymm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovusqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovusqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovusqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovusqw %xmm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovusqw %xmm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovusqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovusqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovusqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vpmovusqw %ymm19, %xmm16 +# CHECK-NEXT: 2 8 1.00 * vpmovusqw %ymm19, (%rax) +# CHECK-NEXT: 1 1 1.00 vpmovusqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 2 8 1.00 * vpmovusqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vpmovusqw %ymm19, %xmm16 {%k1} {z} # CHECK-NEXT: 1 1 0.50 vpmovzxbd %xmm16, %xmm19 # CHECK-NEXT: 2 7 0.50 * vpmovzxbd (%rax), %xmm19 # CHECK-NEXT: 1 1 0.50 vpmovzxbd %xmm16, %xmm19 {%k1} @@ -3269,7 +3599,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 1935.00 278.00 579.50 48.00 738.50 495.50 495.50 +# CHECK-NEXT: - 1935.00 278.00 579.50 48.00 894.50 525.50 525.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -3511,14 +3841,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm16, %ymm19 -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %ymm19 -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to4}, %ymm19 +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax), %ymm19 +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax){1to4}, %ymm19 # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %ymm19 {%k1} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - - - vcvtps2udq %xmm16, %xmm19 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax), %xmm19 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax){1to4}, %xmm19 @@ -4325,6 +4655,106 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - - - - - 0.50 0.50 vpgatherdd (%rax,%xmm1,2), %xmm2 {%k1} # CHECK-NEXT: - - - - - - 0.50 0.50 vpgatherqq (%rax,%xmm1,2), %xmm2 {%k1} # CHECK-NEXT: - - - - - - 0.50 0.50 vpgatherqd (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovdb %xmm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovdb %xmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovdb %ymm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovdb %ymm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovdw %xmm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovdw %xmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovdw %ymm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovdw %ymm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovqb %xmm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovqb %xmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovqb %ymm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovqb %ymm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovqd %xmm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovqd %xmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovqd %ymm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovqd %ymm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovqw %xmm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovqw %xmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovqw %ymm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovqw %ymm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovqw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsdb %xmm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsdb %xmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovsdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsdb %ymm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsdb %ymm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovsdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsdw %xmm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsdw %xmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovsdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsdw %ymm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsdw %ymm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovsdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsqb %xmm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsqb %xmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovsqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsqb %ymm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsqb %ymm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovsqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsqd %xmm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsqd %xmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovsqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsqd %ymm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsqd %ymm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovsqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsqw %xmm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsqw %xmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovsqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsqw %ymm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsqw %ymm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovsqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovsqw %ymm19, %xmm16 {%k1} {z} # CHECK-NEXT: - - - 0.50 - 0.50 - - vpmovsxbd %xmm16, %xmm19 # CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpmovsxbd (%rax), %xmm19 # CHECK-NEXT: - - - 0.50 - 0.50 - - vpmovsxbd %xmm16, %xmm19 {%k1} @@ -4385,6 +4815,56 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsxwq (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - vpmovsxwq %xmm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovsxwq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusdb %xmm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusdb %xmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovusdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusdb %ymm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusdb %ymm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovusdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusdw %xmm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusdw %xmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovusdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusdw %ymm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusdw %ymm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovusdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusqb %xmm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusqb %xmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovusqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusqb %ymm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusqb %ymm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovusqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusqd %xmm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusqd %xmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovusqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusqd %ymm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusqd %ymm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovusqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusqw %xmm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusqw %xmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovusqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusqw %ymm19, %xmm16 +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusqw %ymm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vpmovusqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpmovusqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vpmovusqw %ymm19, %xmm16 {%k1} {z} # CHECK-NEXT: - - - 0.50 - 0.50 - - vpmovzxbd %xmm16, %xmm19 # CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpmovzxbd (%rax), %xmm19 # CHECK-NEXT: - - - 0.50 - 0.50 - - vpmovzxbd %xmm16, %xmm19 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-f16c.s index 7dea75f8f8fec..4abcd6fc516b7 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-f16c.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-f16c.s @@ -22,14 +22,14 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %xmm2 +# CHECK-NEXT: 2 3 1.00 vcvtph2ps %xmm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %ymm2 -# CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %ymm2 -# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %xmm0, %xmm2 -# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %xmm0, (%rax) -# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %ymm0, %xmm2 -# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %ymm0, (%rax) +# CHECK-NEXT: 2 3 1.00 vcvtph2ps %xmm0, %ymm2 +# CHECK-NEXT: 3 8 1.00 * vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: 3 10 1.00 vcvtps2ph $0, %xmm0, %xmm2 +# CHECK-NEXT: 4 13 1.00 * vcvtps2ph $0, %xmm0, (%rax) +# CHECK-NEXT: 3 10 1.00 vcvtps2ph $0, %ymm0, %xmm2 +# CHECK-NEXT: 4 13 1.00 * vcvtps2ph $0, %ymm0, (%rax) # CHECK: Resources: # CHECK-NEXT: [0] - SBDivider @@ -43,15 +43,15 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - - 8.00 2.00 - 2.00 2.00 +# CHECK-NEXT: - - 8.00 4.00 2.00 5.00 2.00 2.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: -# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %xmm2 -# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %ymm2 -# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %ymm2 -# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax) -# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %ymm0, %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax) +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtph2ps %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtph2ps (%rax), %xmm2 +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtph2ps %xmm0, %ymm2 +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax) +# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s index df0053a1dcb9b..25f79397fa071 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s @@ -448,7 +448,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtsi2sd %rcx, %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2sdl (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2sdq (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 cvtss2sd %xmm0, %xmm2 +# CHECK-NEXT: 2 1 1.00 cvtss2sd %xmm0, %xmm2 # CHECK-NEXT: 2 7 1.00 * cvtss2sd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvttpd2dq %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * cvttpd2dq (%rax), %xmm2 @@ -687,7 +687,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 172.00 75.83 117.33 17.00 101.83 67.00 67.00 +# CHECK-NEXT: - 172.00 75.83 117.33 17.00 102.83 67.00 67.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -732,7 +732,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - cvtsi2sd %rcx, %xmm2 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtsi2sdl (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtsi2sdq (%rax), %xmm2 -# CHECK-NEXT: - - 1.00 - - - - - cvtss2sd %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 - - 1.00 - - cvtss2sd %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 cvtss2sd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - cvttpd2dq %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvttpd2dq (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s index 05c476079c0f9..179393abb08d4 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s @@ -1115,9 +1115,9 @@ vzeroupper # CHECK-NEXT: 1 3 1.00 vcomiss %xmm0, %xmm1 # CHECK-NEXT: 2 8 1.00 * vcomiss (%rax), %xmm1 # CHECK-NEXT: 2 4 1.00 vcvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: 3 10 1.00 * vcvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: 2 10 1.00 * vcvtdq2pd (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 vcvtdq2pd %xmm0, %ymm2 -# CHECK-NEXT: 3 12 1.00 * vcvtdq2pd (%rax), %ymm2 +# CHECK-NEXT: 2 12 1.00 * vcvtdq2pd (%rax), %ymm2 # CHECK-NEXT: 1 3 1.00 vcvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * vcvtdq2ps (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 vcvtdq2ps %ymm0, %ymm2 @@ -1137,7 +1137,7 @@ vzeroupper # CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: 2 6 1.00 * vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: 3 10 1.00 * vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: 2 10 1.00 * vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: 2 4 1.00 vcvtsd2si %xmm0, %ecx # CHECK-NEXT: 2 4 1.00 vcvtsd2si %xmm0, %rcx # CHECK-NEXT: 3 9 1.00 * vcvtsd2si (%rax), %ecx @@ -1736,7 +1736,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 336.00 215.58 248.58 173.17 173.17 38.00 427.58 3.25 12.67 +# CHECK-NEXT: - 336.00 215.58 248.58 173.17 173.17 38.00 424.58 3.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1825,9 +1825,9 @@ vzeroupper # CHECK-NEXT: - - - 1.00 - - - - - - vcomiss %xmm0, %xmm1 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcomiss (%rax), %xmm1 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtdq2pd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtdq2pd %xmm0, %ymm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtdq2pd (%rax), %ymm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtdq2pd (%rax), %ymm2 # CHECK-NEXT: - - - 1.00 - - - - - - vcvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtdq2ps (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - vcvtdq2ps %ymm0, %ymm2 @@ -1847,7 +1847,7 @@ vzeroupper # CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtsd2si %xmm0, %ecx # CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtsd2si %xmm0, %rcx # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtsd2si (%rax), %ecx diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-f16c.s index 538ecf99074ed..d1fb824fee23d 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-f16c.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-f16c.s @@ -45,14 +45,14 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 4.00 4.00 1.67 1.67 2.00 6.00 - 0.67 +# CHECK-NEXT: - - - 8.00 1.67 1.67 2.00 6.00 - 0.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: -# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtph2ps %xmm0, %xmm2 -# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2 -# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtph2ps %xmm0, %ymm2 -# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtph2ps %xmm0, %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtph2ps %xmm0, %ymm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.33 0.33 1.00 1.00 - 0.33 vcvtps2ph $0, %xmm0, (%rax) # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s index 907db6f44a9e2..37a28a66fd350 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s @@ -209,7 +209,7 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 comiss %xmm0, %xmm1 # CHECK-NEXT: 2 8 1.00 * comiss (%rax), %xmm1 # CHECK-NEXT: 1 3 1.00 cvtpi2ps %mm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * cvtpi2ps (%rax), %xmm2 +# CHECK-NEXT: 2 9 1.00 * cvtpi2ps (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtps2pi %xmm0, %mm2 # CHECK-NEXT: 2 9 1.00 * cvtps2pi (%rax), %mm2 # CHECK-NEXT: 2 4 1.00 cvtsi2ss %ecx, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse2.s index 3b4aeb37968fd..c9c3e20eeaded 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse2.s @@ -423,7 +423,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 comisd %xmm0, %xmm1 # CHECK-NEXT: 2 8 1.00 * comisd (%rax), %xmm1 # CHECK-NEXT: 2 4 1.00 cvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: 3 10 1.00 * cvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: 2 10 1.00 * cvtdq2pd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 cvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtdq2ps (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtpd2dq %xmm0, %xmm2 @@ -433,7 +433,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * cvtpd2ps (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtpi2pd %mm0, %xmm2 -# CHECK-NEXT: 3 9 1.00 * cvtpi2pd (%rax), %xmm2 +# CHECK-NEXT: 2 10 1.00 * cvtpi2pd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 cvtps2dq %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtps2dq (%rax), %xmm2 # CHECK-NEXT: 2 2 1.00 cvtps2pd %xmm0, %xmm2 @@ -689,7 +689,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 112.00 70.75 95.75 63.17 63.17 14.00 119.25 2.25 4.67 +# CHECK-NEXT: - 112.00 70.75 95.75 63.17 63.17 14.00 117.25 2.25 4.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -709,7 +709,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - comisd %xmm0, %xmm1 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - comisd (%rax), %xmm1 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - cvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtdq2pd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - cvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtdq2ps (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtpd2dq %xmm0, %xmm2 @@ -719,7 +719,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - cvtpd2ps (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtpi2pd %mm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - cvtpi2pd (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtpi2pd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - cvtps2dq %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtps2dq (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - cvtps2pd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s index 8b495d6ee268e..a5c47bec8daa2 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s @@ -636,6 +636,66 @@ vpgatherdd (%rax,%zmm1,2), %zmm2 {k1} vpgatherqq (%rax,%zmm1,2), %zmm2 {k1} vpgatherqd (%rax,%zmm1,2), %ymm2 {k1} +vpmovdb %zmm19, %xmm16 +vpmovdb %zmm19, (%rax) +vpmovdb %zmm19, %xmm16 {k1} +vpmovdb %zmm19, (%rax) {k1} +vpmovdb %zmm19, %xmm16 {k1}{z} + +vpmovdw %zmm19, %ymm16 +vpmovdw %zmm19, (%rax) +vpmovdw %zmm19, %ymm16 {k1} +vpmovdw %zmm19, (%rax) {k1} +vpmovdw %zmm19, %ymm16 {k1}{z} + +vpmovqb %zmm19, %xmm16 +vpmovqb %zmm19, (%rax) +vpmovqb %zmm19, %xmm16 {k1} +vpmovqb %zmm19, (%rax) {k1} +vpmovqb %zmm19, %xmm16 {k1}{z} + +vpmovqd %zmm19, %ymm16 +vpmovqd %zmm19, (%rax) +vpmovqd %zmm19, %ymm16 {k1} +vpmovqd %zmm19, (%rax) {k1} +vpmovqd %zmm19, %ymm16 {k1}{z} + +vpmovqw %zmm19, %xmm16 +vpmovqw %zmm19, (%rax) +vpmovqw %zmm19, %xmm16 {k1} +vpmovqw %zmm19, (%rax) {k1} +vpmovqw %zmm19, %xmm16 {k1}{z} + +vpmovsdb %zmm19, %xmm16 +vpmovsdb %zmm19, (%rax) +vpmovsdb %zmm19, %xmm16 {k1} +vpmovsdb %zmm19, (%rax) {k1} +vpmovsdb %zmm19, %xmm16 {k1}{z} + +vpmovsdw %zmm19, %ymm16 +vpmovsdw %zmm19, (%rax) +vpmovsdw %zmm19, %ymm16 {k1} +vpmovsdw %zmm19, (%rax) {k1} +vpmovsdw %zmm19, %ymm16 {k1}{z} + +vpmovsqb %zmm19, %xmm16 +vpmovsqb %zmm19, (%rax) +vpmovsqb %zmm19, %xmm16 {k1} +vpmovsqb %zmm19, (%rax) {k1} +vpmovsqb %zmm19, %xmm16 {k1}{z} + +vpmovsqd %zmm19, %ymm16 +vpmovsqd %zmm19, (%rax) +vpmovsqd %zmm19, %ymm16 {k1} +vpmovsqd %zmm19, (%rax) {k1} +vpmovsqd %zmm19, %ymm16 {k1}{z} + +vpmovsqw %zmm19, %xmm16 +vpmovsqw %zmm19, (%rax) +vpmovsqw %zmm19, %xmm16 {k1} +vpmovsqw %zmm19, (%rax) {k1} +vpmovsqw %zmm19, %xmm16 {k1}{z} + vpmovsxbd %xmm16, %zmm19 vpmovsxbd (%rax), %zmm19 vpmovsxbd %xmm16, %zmm19 {k1} @@ -671,6 +731,36 @@ vpmovsxwq (%rax), %zmm19 {k1} vpmovsxwq %xmm16, %zmm19 {z}{k1} vpmovsxwq (%rax), %zmm19 {z}{k1} +vpmovusdb %zmm19, %xmm16 +vpmovusdb %zmm19, (%rax) +vpmovusdb %zmm19, %xmm16 {k1} +vpmovusdb %zmm19, (%rax) {k1} +vpmovusdb %zmm19, %xmm16 {k1}{z} + +vpmovusdw %zmm19, %ymm16 +vpmovusdw %zmm19, (%rax) +vpmovusdw %zmm19, %ymm16 {k1} +vpmovusdw %zmm19, (%rax) {k1} +vpmovusdw %zmm19, %ymm16 {k1}{z} + +vpmovusqb %zmm19, %xmm16 +vpmovusqb %zmm19, (%rax) +vpmovusqb %zmm19, %xmm16 {k1} +vpmovusqb %zmm19, (%rax) {k1} +vpmovusqb %zmm19, %xmm16 {k1}{z} + +vpmovusqd %zmm19, %ymm16 +vpmovusqd %zmm19, (%rax) +vpmovusqd %zmm19, %ymm16 {k1} +vpmovusqd %zmm19, (%rax) {k1} +vpmovusqd %zmm19, %ymm16 {k1}{z} + +vpmovusqw %zmm19, %xmm16 +vpmovusqw %zmm19, (%rax) +vpmovusqw %zmm19, %xmm16 {k1} +vpmovusqw %zmm19, (%rax) {k1} +vpmovusqw %zmm19, %xmm16 {k1}{z} + vpmovzxbd %xmm16, %zmm19 vpmovzxbd (%rax), %zmm19 vpmovzxbd %xmm16, %zmm19 {k1} @@ -1646,6 +1736,56 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 5 25 8.00 * vpgatherdd (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: 5 21 4.00 * vpgatherqq (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: 5 21 4.00 * vpgatherqd (%rax,%zmm1,2), %ymm2 {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovdb %zmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovdb %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovdw %zmm19, %ymm16 +# CHECK-NEXT: 4 5 2.00 * vpmovdw %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovqb %zmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovqb %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vpmovqd %zmm19, %ymm16 +# CHECK-NEXT: 3 4 1.00 * vpmovqd %zmm19, (%rax) +# CHECK-NEXT: 1 3 1.00 vpmovqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 3 4 1.00 * vpmovqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 3 1.00 vpmovqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovqw %zmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovqw %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovqw %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsdb %zmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsdb %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsdw %zmm19, %ymm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsdw %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsqb %zmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsqb %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsqd %zmm19, %ymm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsqd %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsqw %zmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsqw %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsqw %zmm19, %xmm16 {%k1} {z} # CHECK-NEXT: 1 3 1.00 vpmovsxbd %xmm16, %zmm19 # CHECK-NEXT: 2 10 1.00 * vpmovsxbd (%rax), %zmm19 # CHECK-NEXT: 1 3 1.00 vpmovsxbd %xmm16, %zmm19 {%k1} @@ -1676,6 +1816,31 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 2 10 1.00 * vpmovsxwq (%rax), %zmm19 {%k1} # CHECK-NEXT: 1 3 1.00 vpmovsxwq %xmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vpmovsxwq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusdb %zmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusdb %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovusdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovusdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusdw %zmm19, %ymm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusdw %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovusdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovusdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusqb %zmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusqb %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovusqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovusqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusqd %zmm19, %ymm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusqd %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovusqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovusqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vpmovusqw %zmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusqw %zmm19, (%rax) +# CHECK-NEXT: 1 3 1.00 vpmovusqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 3 1.00 vpmovusqw %zmm19, %xmm16 {%k1} {z} # CHECK-NEXT: 1 3 1.00 vpmovzxbd %xmm16, %zmm19 # CHECK-NEXT: 2 10 1.00 * vpmovzxbd (%rax), %zmm19 # CHECK-NEXT: 1 3 1.00 vpmovzxbd %xmm16, %zmm19 {%k1} @@ -2057,7 +2222,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - 612.00 411.50 104.00 328.00 328.00 48.50 593.50 6.00 48.50 48.50 48.50 +# CHECK-NEXT: - 612.00 411.50 104.00 328.00 328.00 63.50 735.50 6.00 63.50 63.50 63.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -2617,6 +2782,56 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - 1.58 0.58 8.00 8.00 - 0.58 0.25 - - - vpgatherdd (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - - - vpgatherqq (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - - - vpgatherqd (%rax,%zmm1,2), %ymm2 {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovdb %zmm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovdb %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovdw %zmm19, %ymm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovdw %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovqb %zmm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovqb %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 1.00 - - - - vpmovqd %zmm19, %ymm16 +# CHECK-NEXT: - - - - - - 0.50 1.00 - 0.50 0.50 0.50 vpmovqd %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 1.00 - - - - vpmovqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 1.00 - 0.50 0.50 0.50 vpmovqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 1.00 - - - - vpmovqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovqw %zmm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovqw %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovqw %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsdb %zmm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsdb %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsdw %zmm19, %ymm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsdw %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsqb %zmm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsqb %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsqd %zmm19, %ymm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsqd %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsqw %zmm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsqw %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsqw %zmm19, %xmm16 {%k1} {z} # CHECK-NEXT: - - - - - - - 1.00 - - - - vpmovsxbd %xmm16, %zmm19 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vpmovsxbd (%rax), %zmm19 # CHECK-NEXT: - - - - - - - 1.00 - - - - vpmovsxbd %xmm16, %zmm19 {%k1} @@ -2647,6 +2862,31 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vpmovsxwq (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - - - 1.00 - - - - vpmovsxwq %xmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vpmovsxwq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusdb %zmm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusdb %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusdw %zmm19, %ymm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusdw %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusqb %zmm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusqb %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusqd %zmm19, %ymm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusqd %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 1.00 - - - - vpmovusqw %zmm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusqw %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 1.00 - - - - vpmovusqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 1.00 - - - - vpmovusqw %zmm19, %xmm16 {%k1} {z} # CHECK-NEXT: - - - - - - - 1.00 - - - - vpmovzxbd %xmm16, %zmm19 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vpmovzxbd (%rax), %zmm19 # CHECK-NEXT: - - - - - - - 1.00 - - - - vpmovzxbd %xmm16, %zmm19 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bw.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bw.s index 3bcc961c70e2a..ad81aeae31e4c 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bw.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bw.s @@ -373,16 +373,19 @@ vpmovswb %zmm16, %ymm19 vpmovswb %zmm16, (%rax) vpmovswb %zmm16, %ymm19 {k1} vpmovswb %zmm16, (%rax) {k1} +vpmovswb %zmm16, %ymm19 {z}{k1} vpmovuswb %zmm16, %ymm19 vpmovuswb %zmm16, (%rax) vpmovuswb %zmm16, %ymm19 {k1} vpmovuswb %zmm16, (%rax) {k1} +vpmovuswb %zmm16, %ymm19 {z}{k1} vpmovwb %zmm16, %ymm19 vpmovwb %zmm16, (%rax) vpmovwb %zmm16, %ymm19 {k1} vpmovwb %zmm16, (%rax) {k1} +vpmovwb %zmm16, %ymm19 {z}{k1} vpmovzxbw %ymm16, %zmm19 vpmovzxbw (%rax), %zmm19 @@ -919,14 +922,17 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 4 5 2.00 * vpmovswb %zmm16, (%rax) # CHECK-NEXT: 2 4 2.00 vpmovswb %zmm16, %ymm19 {%k1} # CHECK-NEXT: 4 5 2.00 * vpmovswb %zmm16, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovswb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: 2 4 2.00 vpmovuswb %zmm16, %ymm19 # CHECK-NEXT: 4 5 2.00 * vpmovuswb %zmm16, (%rax) # CHECK-NEXT: 2 4 2.00 vpmovuswb %zmm16, %ymm19 {%k1} # CHECK-NEXT: 4 5 2.00 * vpmovuswb %zmm16, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovuswb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: 2 4 2.00 vpmovwb %zmm16, %ymm19 # CHECK-NEXT: 4 5 2.00 * vpmovwb %zmm16, (%rax) # CHECK-NEXT: 2 4 2.00 vpmovwb %zmm16, %ymm19 {%k1} # CHECK-NEXT: 4 5 2.00 * vpmovwb %zmm16, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovwb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: 1 3 1.00 vpmovzxbw %ymm16, %zmm19 # CHECK-NEXT: 2 10 1.00 * vpmovzxbw (%rax), %zmm19 # CHECK-NEXT: 1 3 1.00 vpmovzxbw %ymm16, %zmm19 {%k1} @@ -1128,7 +1134,7 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - - 239.50 19.50 110.00 110.00 8.00 293.50 0.50 8.00 8.00 8.00 +# CHECK-NEXT: - - 239.50 19.50 110.00 110.00 8.00 299.50 0.50 8.00 8.00 8.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -1440,14 +1446,17 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovswb %zmm16, (%rax) # CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovswb %zmm16, %ymm19 {%k1} # CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovswb %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovswb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovuswb %zmm16, %ymm19 # CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovuswb %zmm16, (%rax) # CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovuswb %zmm16, %ymm19 {%k1} # CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovuswb %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovuswb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovwb %zmm16, %ymm19 # CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovwb %zmm16, (%rax) # CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovwb %zmm16, %ymm19 {%k1} # CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovwb %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovwb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 1.00 - - - - vpmovzxbw %ymm16, %zmm19 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vpmovzxbw (%rax), %zmm19 # CHECK-NEXT: - - - - - - - 1.00 - - - - vpmovzxbw %ymm16, %zmm19 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bwvl.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bwvl.s index ee095f0d3bfc1..cec13a1096d52 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bwvl.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bwvl.s @@ -621,31 +621,37 @@ vpmovswb %xmm16, %xmm19 vpmovswb %xmm16, (%rax) vpmovswb %xmm16, %xmm19 {k1} vpmovswb %xmm16, (%rax) {k1} +vpmovswb %xmm16, %xmm19 {z}{k1} vpmovswb %ymm16, %xmm19 vpmovswb %ymm16, (%rax) vpmovswb %ymm16, %xmm19 {k1} vpmovswb %ymm16, (%rax) {k1} +vpmovswb %ymm16, %xmm19 {z}{k1} vpmovuswb %xmm16, %xmm19 vpmovuswb %xmm16, (%rax) vpmovuswb %xmm16, %xmm19 {k1} vpmovuswb %xmm16, (%rax) {k1} +vpmovuswb %xmm16, %xmm19 {z}{k1} vpmovuswb %ymm16, %xmm19 vpmovuswb %ymm16, (%rax) vpmovuswb %ymm16, %xmm19 {k1} vpmovuswb %ymm16, (%rax) {k1} +vpmovuswb %ymm16, %xmm19 {z}{k1} vpmovwb %xmm16, %xmm19 vpmovwb %xmm16, (%rax) vpmovwb %xmm16, %xmm19 {k1} vpmovwb %xmm16, (%rax) {k1} +vpmovwb %xmm16, %xmm19 {z}{k1} vpmovwb %ymm16, %xmm19 vpmovwb %ymm16, (%rax) vpmovwb %ymm16, %xmm19 {k1} vpmovwb %ymm16, (%rax) {k1} +vpmovwb %ymm16, %xmm19 {z}{k1} vpmovzxbw %xmm16, %xmm19 vpmovzxbw (%rax), %xmm19 @@ -1620,26 +1626,32 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 4 5 2.00 * vpmovswb %xmm16, (%rax) # CHECK-NEXT: 2 4 2.00 vpmovswb %xmm16, %xmm19 {%k1} # CHECK-NEXT: 4 5 2.00 * vpmovswb %xmm16, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovswb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: 2 4 2.00 vpmovswb %ymm16, %xmm19 # CHECK-NEXT: 4 5 2.00 * vpmovswb %ymm16, (%rax) # CHECK-NEXT: 2 4 2.00 vpmovswb %ymm16, %xmm19 {%k1} # CHECK-NEXT: 4 5 2.00 * vpmovswb %ymm16, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovswb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: 2 4 2.00 vpmovuswb %xmm16, %xmm19 # CHECK-NEXT: 4 5 2.00 * vpmovuswb %xmm16, (%rax) # CHECK-NEXT: 2 4 2.00 vpmovuswb %xmm16, %xmm19 {%k1} # CHECK-NEXT: 4 5 2.00 * vpmovuswb %xmm16, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovuswb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: 2 4 2.00 vpmovuswb %ymm16, %xmm19 # CHECK-NEXT: 4 5 2.00 * vpmovuswb %ymm16, (%rax) # CHECK-NEXT: 2 4 2.00 vpmovuswb %ymm16, %xmm19 {%k1} # CHECK-NEXT: 4 5 2.00 * vpmovuswb %ymm16, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovuswb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: 2 4 2.00 vpmovwb %xmm16, %xmm19 # CHECK-NEXT: 4 5 2.00 * vpmovwb %xmm16, (%rax) # CHECK-NEXT: 2 4 2.00 vpmovwb %xmm16, %xmm19 {%k1} # CHECK-NEXT: 4 5 2.00 * vpmovwb %xmm16, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovwb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: 2 4 2.00 vpmovwb %ymm16, %xmm19 # CHECK-NEXT: 4 5 2.00 * vpmovwb %ymm16, (%rax) # CHECK-NEXT: 2 4 2.00 vpmovwb %ymm16, %xmm19 {%k1} # CHECK-NEXT: 4 5 2.00 * vpmovwb %ymm16, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovwb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: 1 1 0.50 vpmovzxbw %xmm16, %xmm19 # CHECK-NEXT: 2 7 0.50 * vpmovzxbw (%rax), %xmm19 # CHECK-NEXT: 1 1 0.50 vpmovzxbw %xmm16, %xmm19 {%k1} @@ -2025,7 +2037,7 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - - 255.33 307.83 216.00 216.00 10.00 446.83 - 10.00 10.00 10.00 +# CHECK-NEXT: - - 255.33 307.83 216.00 216.00 10.00 458.83 - 10.00 10.00 10.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -2557,26 +2569,32 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovswb %xmm16, (%rax) # CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovswb %xmm16, %xmm19 {%k1} # CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovswb %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovswb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovswb %ymm16, %xmm19 # CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovswb %ymm16, (%rax) # CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovswb %ymm16, %xmm19 {%k1} # CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovswb %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovswb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovuswb %xmm16, %xmm19 # CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovuswb %xmm16, (%rax) # CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovuswb %xmm16, %xmm19 {%k1} # CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovuswb %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovuswb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovuswb %ymm16, %xmm19 # CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovuswb %ymm16, (%rax) # CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovuswb %ymm16, %xmm19 {%k1} # CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovuswb %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovuswb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovwb %xmm16, %xmm19 # CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovwb %xmm16, (%rax) # CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovwb %xmm16, %xmm19 {%k1} # CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovwb %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovwb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovwb %ymm16, %xmm19 # CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovwb %ymm16, (%rax) # CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovwb %ymm16, %xmm19 {%k1} # CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovwb %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovwb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - 0.50 - - - 0.50 - - - - vpmovzxbw %xmm16, %xmm19 # CHECK-NEXT: - - - 0.50 0.50 0.50 - 0.50 - - - - vpmovzxbw (%rax), %xmm19 # CHECK-NEXT: - - - 0.50 - - - 0.50 - - - - vpmovzxbw %xmm16, %xmm19 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s index a57ed444f794e..3a0efa584cad4 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s @@ -1187,6 +1187,126 @@ vpgatherdd (%rax,%xmm1,2), %xmm2 {k1} vpgatherqq (%rax,%xmm1,2), %xmm2 {k1} vpgatherqd (%rax,%xmm1,2), %xmm2 {k1} +vpmovdb %xmm19, %xmm16 +vpmovdb %xmm19, (%rax) +vpmovdb %xmm19, %xmm16 {k1} +vpmovdb %xmm19, (%rax) {k1} +vpmovdb %xmm19, %xmm16 {k1}{z} + +vpmovdb %ymm19, %xmm16 +vpmovdb %ymm19, (%rax) +vpmovdb %ymm19, %xmm16 {k1} +vpmovdb %ymm19, (%rax) {k1} +vpmovdb %ymm19, %xmm16 {k1}{z} + +vpmovdw %xmm19, %xmm16 +vpmovdw %xmm19, (%rax) +vpmovdw %xmm19, %xmm16 {k1} +vpmovdw %xmm19, (%rax) {k1} +vpmovdw %xmm19, %xmm16 {k1}{z} + +vpmovdw %ymm19, %xmm16 +vpmovdw %ymm19, (%rax) +vpmovdw %ymm19, %xmm16 {k1} +vpmovdw %ymm19, (%rax) {k1} +vpmovdw %ymm19, %xmm16 {k1}{z} + +vpmovqb %xmm19, %xmm16 +vpmovqb %xmm19, (%rax) +vpmovqb %xmm19, %xmm16 {k1} +vpmovqb %xmm19, (%rax) {k1} +vpmovqb %xmm19, %xmm16 {k1}{z} + +vpmovqb %ymm19, %xmm16 +vpmovqb %ymm19, (%rax) +vpmovqb %ymm19, %xmm16 {k1} +vpmovqb %ymm19, (%rax) {k1} +vpmovqb %ymm19, %xmm16 {k1}{z} + +vpmovqd %xmm19, %xmm16 +vpmovqd %xmm19, (%rax) +vpmovqd %xmm19, %xmm16 {k1} +vpmovqd %xmm19, (%rax) {k1} +vpmovqd %xmm19, %xmm16 {k1}{z} + +vpmovqd %ymm19, %xmm16 +vpmovqd %ymm19, (%rax) +vpmovqd %ymm19, %xmm16 {k1} +vpmovqd %ymm19, (%rax) {k1} +vpmovqd %ymm19, %xmm16 {k1}{z} + +vpmovqw %xmm19, %xmm16 +vpmovqw %xmm19, (%rax) +vpmovqw %xmm19, %xmm16 {k1} +vpmovqw %xmm19, (%rax) {k1} +vpmovqw %xmm19, %xmm16 {k1}{z} + +vpmovqw %ymm19, %xmm16 +vpmovqw %ymm19, (%rax) +vpmovqw %ymm19, %xmm16 {k1} +vpmovqw %ymm19, (%rax) {k1} +vpmovqw %ymm19, %xmm16 {k1}{z} + +vpmovsdb %xmm19, %xmm16 +vpmovsdb %xmm19, (%rax) +vpmovsdb %xmm19, %xmm16 {k1} +vpmovsdb %xmm19, (%rax) {k1} +vpmovsdb %xmm19, %xmm16 {k1}{z} + +vpmovsdb %ymm19, %xmm16 +vpmovsdb %ymm19, (%rax) +vpmovsdb %ymm19, %xmm16 {k1} +vpmovsdb %ymm19, (%rax) {k1} +vpmovsdb %ymm19, %xmm16 {k1}{z} + +vpmovsdw %xmm19, %xmm16 +vpmovsdw %xmm19, (%rax) +vpmovsdw %xmm19, %xmm16 {k1} +vpmovsdw %xmm19, (%rax) {k1} +vpmovsdw %xmm19, %xmm16 {k1}{z} + +vpmovsdw %ymm19, %xmm16 +vpmovsdw %ymm19, (%rax) +vpmovsdw %ymm19, %xmm16 {k1} +vpmovsdw %ymm19, (%rax) {k1} +vpmovsdw %ymm19, %xmm16 {k1}{z} + +vpmovsqb %xmm19, %xmm16 +vpmovsqb %xmm19, (%rax) +vpmovsqb %xmm19, %xmm16 {k1} +vpmovsqb %xmm19, (%rax) {k1} +vpmovsqb %xmm19, %xmm16 {k1}{z} + +vpmovsqb %ymm19, %xmm16 +vpmovsqb %ymm19, (%rax) +vpmovsqb %ymm19, %xmm16 {k1} +vpmovsqb %ymm19, (%rax) {k1} +vpmovsqb %ymm19, %xmm16 {k1}{z} + +vpmovsqd %xmm19, %xmm16 +vpmovsqd %xmm19, (%rax) +vpmovsqd %xmm19, %xmm16 {k1} +vpmovsqd %xmm19, (%rax) {k1} +vpmovsqd %xmm19, %xmm16 {k1}{z} + +vpmovsqd %ymm19, %xmm16 +vpmovsqd %ymm19, (%rax) +vpmovsqd %ymm19, %xmm16 {k1} +vpmovsqd %ymm19, (%rax) {k1} +vpmovsqd %ymm19, %xmm16 {k1}{z} + +vpmovsqw %xmm19, %xmm16 +vpmovsqw %xmm19, (%rax) +vpmovsqw %xmm19, %xmm16 {k1} +vpmovsqw %xmm19, (%rax) {k1} +vpmovsqw %xmm19, %xmm16 {k1}{z} + +vpmovsqw %ymm19, %xmm16 +vpmovsqw %ymm19, (%rax) +vpmovsqw %ymm19, %xmm16 {k1} +vpmovsqw %ymm19, (%rax) {k1} +vpmovsqw %ymm19, %xmm16 {k1}{z} + vpmovsxbd %xmm16, %xmm19 vpmovsxbd (%rax), %xmm19 vpmovsxbd %xmm16, %xmm19 {k1} @@ -1257,6 +1377,66 @@ vpmovsxwq (%rax), %ymm19 {k1} vpmovsxwq %xmm16, %ymm19 {z}{k1} vpmovsxwq (%rax), %ymm19 {z}{k1} +vpmovusdb %xmm19, %xmm16 +vpmovusdb %xmm19, (%rax) +vpmovusdb %xmm19, %xmm16 {k1} +vpmovusdb %xmm19, (%rax) {k1} +vpmovusdb %xmm19, %xmm16 {k1}{z} + +vpmovusdb %ymm19, %xmm16 +vpmovusdb %ymm19, (%rax) +vpmovusdb %ymm19, %xmm16 {k1} +vpmovusdb %ymm19, (%rax) {k1} +vpmovusdb %ymm19, %xmm16 {k1}{z} + +vpmovusdw %xmm19, %xmm16 +vpmovusdw %xmm19, (%rax) +vpmovusdw %xmm19, %xmm16 {k1} +vpmovusdw %xmm19, (%rax) {k1} +vpmovusdw %xmm19, %xmm16 {k1}{z} + +vpmovusdw %ymm19, %xmm16 +vpmovusdw %ymm19, (%rax) +vpmovusdw %ymm19, %xmm16 {k1} +vpmovusdw %ymm19, (%rax) {k1} +vpmovusdw %ymm19, %xmm16 {k1}{z} + +vpmovusqb %xmm19, %xmm16 +vpmovusqb %xmm19, (%rax) +vpmovusqb %xmm19, %xmm16 {k1} +vpmovusqb %xmm19, (%rax) {k1} +vpmovusqb %xmm19, %xmm16 {k1}{z} + +vpmovusqb %ymm19, %xmm16 +vpmovusqb %ymm19, (%rax) +vpmovusqb %ymm19, %xmm16 {k1} +vpmovusqb %ymm19, (%rax) {k1} +vpmovusqb %ymm19, %xmm16 {k1}{z} + +vpmovusqd %xmm19, %xmm16 +vpmovusqd %xmm19, (%rax) +vpmovusqd %xmm19, %xmm16 {k1} +vpmovusqd %xmm19, (%rax) {k1} +vpmovusqd %xmm19, %xmm16 {k1}{z} + +vpmovusqd %ymm19, %xmm16 +vpmovusqd %ymm19, (%rax) +vpmovusqd %ymm19, %xmm16 {k1} +vpmovusqd %ymm19, (%rax) {k1} +vpmovusqd %ymm19, %xmm16 {k1}{z} + +vpmovusqw %xmm19, %xmm16 +vpmovusqw %xmm19, (%rax) +vpmovusqw %xmm19, %xmm16 {k1} +vpmovusqw %xmm19, (%rax) {k1} +vpmovusqw %xmm19, %xmm16 {k1}{z} + +vpmovusqw %ymm19, %xmm16 +vpmovusqw %ymm19, (%rax) +vpmovusqw %ymm19, %xmm16 {k1} +vpmovusqw %ymm19, (%rax) {k1} +vpmovusqw %ymm19, %xmm16 {k1}{z} + vpmovzxbd %xmm16, %xmm19 vpmovzxbd (%rax), %xmm19 vpmovzxbd %xmm16, %xmm19 {k1} @@ -2784,6 +2964,106 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 5 19 2.00 * vpgatherdd (%rax,%xmm1,2), %xmm2 {%k1} # CHECK-NEXT: 5 17 1.00 * vpgatherqq (%rax,%xmm1,2), %xmm2 {%k1} # CHECK-NEXT: 5 17 1.00 * vpgatherqd (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovdb %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovdb %xmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovdb %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovdb %ymm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovdw %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovdw %xmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovdw %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovdw %ymm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovqb %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovqb %xmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovqb %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovqb %ymm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vpmovqd %xmm19, %xmm16 +# CHECK-NEXT: 3 4 1.00 * vpmovqd %xmm19, (%rax) +# CHECK-NEXT: 1 3 1.00 vpmovqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 3 4 1.00 * vpmovqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 3 1.00 vpmovqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vpmovqd %ymm19, %xmm16 +# CHECK-NEXT: 3 4 1.00 * vpmovqd %ymm19, (%rax) +# CHECK-NEXT: 1 3 1.00 vpmovqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 3 4 1.00 * vpmovqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 3 1.00 vpmovqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovqw %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovqw %xmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovqw %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovqw %ymm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovqw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsdb %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsdb %xmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsdb %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsdb %ymm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsdw %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsdw %xmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsdw %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsdw %ymm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsqb %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsqb %xmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsqb %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsqb %ymm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsqd %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsqd %xmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsqd %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsqd %ymm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsqw %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsqw %xmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsqw %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsqw %ymm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsqw %ymm19, %xmm16 {%k1} {z} # CHECK-NEXT: 1 1 0.50 vpmovsxbd %xmm16, %xmm19 # CHECK-NEXT: 2 7 0.50 * vpmovsxbd (%rax), %xmm19 # CHECK-NEXT: 1 1 0.50 vpmovsxbd %xmm16, %xmm19 {%k1} @@ -2844,6 +3124,56 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 2 10 1.00 * vpmovsxwq (%rax), %ymm19 {%k1} # CHECK-NEXT: 1 3 1.00 vpmovsxwq %xmm16, %ymm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vpmovsxwq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusdb %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusdb %xmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovusdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovusdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusdb %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusdb %ymm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovusdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovusdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusdw %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusdw %xmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovusdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovusdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusdw %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusdw %ymm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovusdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovusdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusqb %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusqb %xmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovusqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovusqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusqb %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusqb %ymm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovusqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovusqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusqd %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusqd %xmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovusqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovusqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusqd %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusqd %ymm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovusqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovusqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vpmovusqw %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusqw %xmm19, (%rax) +# CHECK-NEXT: 1 3 1.00 vpmovusqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 3 1.00 vpmovusqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vpmovusqw %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusqw %ymm19, (%rax) +# CHECK-NEXT: 1 3 1.00 vpmovusqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 3 1.00 vpmovusqw %ymm19, %xmm16 {%k1} {z} # CHECK-NEXT: 1 1 0.50 vpmovzxbd %xmm16, %xmm19 # CHECK-NEXT: 2 7 0.50 * vpmovzxbd (%rax), %xmm19 # CHECK-NEXT: 1 1 0.50 vpmovzxbd %xmm16, %xmm19 {%k1} @@ -3273,7 +3603,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - 423.00 463.00 422.00 493.50 493.50 44.00 739.00 12.00 44.00 44.00 44.00 +# CHECK-NEXT: - 423.00 463.00 422.00 493.50 493.50 74.00 1023.00 12.00 74.00 74.00 74.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -4329,6 +4659,106 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - 1.58 0.58 2.00 2.00 - 0.58 0.25 - - - vpgatherdd (%rax,%xmm1,2), %xmm2 {%k1} # CHECK-NEXT: - - 1.58 0.58 1.00 1.00 - 0.58 0.25 - - - vpgatherqq (%rax,%xmm1,2), %xmm2 {%k1} # CHECK-NEXT: - - 1.58 0.58 1.00 1.00 - 0.58 0.25 - - - vpgatherqd (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovdb %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovdb %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovdb %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovdb %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovdw %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovdw %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovdw %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovdw %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovqb %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovqb %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovqb %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovqb %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 1.00 - - - - vpmovqd %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 1.00 - 0.50 0.50 0.50 vpmovqd %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 1.00 - - - - vpmovqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 1.00 - 0.50 0.50 0.50 vpmovqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 1.00 - - - - vpmovqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 1.00 - - - - vpmovqd %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 1.00 - 0.50 0.50 0.50 vpmovqd %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 1.00 - - - - vpmovqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 1.00 - 0.50 0.50 0.50 vpmovqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 1.00 - - - - vpmovqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovqw %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovqw %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovqw %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovqw %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovqw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsdb %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsdb %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsdb %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsdb %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsdw %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsdw %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsdw %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsdw %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsqb %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsqb %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsqb %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsqb %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsqd %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsqd %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsqd %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsqd %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsqw %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsqw %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsqw %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsqw %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovsqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovsqw %ymm19, %xmm16 {%k1} {z} # CHECK-NEXT: - - - 0.50 - - - 0.50 - - - - vpmovsxbd %xmm16, %xmm19 # CHECK-NEXT: - - - 0.50 0.50 0.50 - 0.50 - - - - vpmovsxbd (%rax), %xmm19 # CHECK-NEXT: - - - 0.50 - - - 0.50 - - - - vpmovsxbd %xmm16, %xmm19 {%k1} @@ -4389,6 +4819,56 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vpmovsxwq (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - - - 1.00 - - - - vpmovsxwq %xmm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vpmovsxwq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusdb %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusdb %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusdb %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusdb %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusdw %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusdw %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusdw %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusdw %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusqb %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusqb %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusqb %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusqb %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusqd %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusqd %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusqd %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusqd %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - - - vpmovusqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 1.00 - - - - vpmovusqw %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusqw %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 1.00 - - - - vpmovusqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 1.00 - - - - vpmovusqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 1.00 - - - - vpmovusqw %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusqw %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 1.00 - - - - vpmovusqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - 0.50 2.00 - 0.50 0.50 0.50 vpmovusqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 1.00 - - - - vpmovusqw %ymm19, %xmm16 {%k1} {z} # CHECK-NEXT: - - - 0.50 - - - 0.50 - - - - vpmovzxbd %xmm16, %xmm19 # CHECK-NEXT: - - - 0.50 0.50 0.50 - 0.50 - - - - vpmovzxbd (%rax), %xmm19 # CHECK-NEXT: - - - 0.50 - - - 0.50 - - - - vpmovzxbd %xmm16, %xmm19 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s index d6d157827b314..781676d70763c 100644 --- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s @@ -1137,7 +1137,7 @@ vzeroupper # CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: 2 5 1.00 vcvtsd2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 vcvtsd2si %xmm0, %rcx # CHECK-NEXT: 3 10 1.00 * vcvtsd2si (%rax), %ecx @@ -1152,7 +1152,7 @@ vzeroupper # CHECK-NEXT: 3 5 2.00 vcvtsi2ss %rcx, %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * vcvtsi2ssl (%rax), %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * vcvtsi2ssq (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 1 1 1.00 vcvtss2sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 1 1.00 vcvtss2sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 7 1.00 * vcvtss2sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 2 5 1.00 vcvtss2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 vcvtss2si %xmm0, %rcx @@ -1734,7 +1734,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 572.00 248.50 319.00 39.00 369.50 179.50 179.50 +# CHECK-NEXT: - 572.00 248.50 319.00 39.00 371.50 179.50 179.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -1845,7 +1845,7 @@ vzeroupper # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtsd2si %xmm0, %ecx # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtsd2si %xmm0, %rcx # CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 vcvtsd2si (%rax), %ecx @@ -1860,7 +1860,7 @@ vzeroupper # CHECK-NEXT: - - - 1.00 - 2.00 - - vcvtsi2ss %rcx, %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtsi2ssl (%rax), %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtsi2ssq (%rax), %xmm0, %xmm2 -# CHECK-NEXT: - - 1.00 - - - - - vcvtss2sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtss2sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtss2sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtss2si %xmm0, %ecx # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtss2si %xmm0, %rcx diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s index a2ec86e8724fa..9284810b9e73b 100644 --- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s +++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s @@ -22,14 +22,14 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %xmm2 +# CHECK-NEXT: 2 3 1.00 vcvtph2ps %xmm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %ymm2 -# CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %ymm2 -# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %xmm0, %xmm2 -# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %xmm0, (%rax) -# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %ymm0, %xmm2 -# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %ymm0, (%rax) +# CHECK-NEXT: 2 3 1.00 vcvtph2ps %xmm0, %ymm2 +# CHECK-NEXT: 3 8 1.00 * vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: 3 10 1.00 vcvtps2ph $0, %xmm0, %xmm2 +# CHECK-NEXT: 4 13 1.00 * vcvtps2ph $0, %xmm0, (%rax) +# CHECK-NEXT: 3 10 1.00 vcvtps2ph $0, %ymm0, %xmm2 +# CHECK-NEXT: 4 13 1.00 * vcvtps2ph $0, %ymm0, (%rax) # CHECK: Resources: # CHECK-NEXT: [0] - SBDivider @@ -43,15 +43,15 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - - 8.00 2.00 - 2.00 2.00 +# CHECK-NEXT: - - 8.00 4.00 2.00 5.00 2.00 2.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: -# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %xmm2 -# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %ymm2 -# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %ymm2 -# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax) -# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %ymm0, %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax) +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtph2ps %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtph2ps (%rax), %xmm2 +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtph2ps %xmm0, %ymm2 +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax) +# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s index e2cfd02bc76c8..ff0f22bec1402 100644 --- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s @@ -448,7 +448,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtsi2sd %rcx, %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2sdl (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2sdq (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 cvtss2sd %xmm0, %xmm2 +# CHECK-NEXT: 2 1 1.00 cvtss2sd %xmm0, %xmm2 # CHECK-NEXT: 2 7 1.00 * cvtss2sd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvttpd2dq %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * cvttpd2dq (%rax), %xmm2 @@ -687,7 +687,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 172.00 75.83 117.33 17.00 101.83 67.00 67.00 +# CHECK-NEXT: - 172.00 75.83 117.33 17.00 102.83 67.00 67.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -732,7 +732,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - cvtsi2sd %rcx, %xmm2 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtsi2sdl (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtsi2sdq (%rax), %xmm2 -# CHECK-NEXT: - - 1.00 - - - - - cvtss2sd %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 - - 1.00 - - cvtss2sd %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 cvtss2sd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - cvttpd2dq %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvttpd2dq (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/independent-load-stores.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/independent-load-stores.s index 678619ff4f5f6..08a706d4ab97a 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/independent-load-stores.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/independent-load-stores.s @@ -68,20 +68,20 @@ # ALL: Resource pressure per iteration: # ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# ALL-NEXT: 2.00 2.00 3.33 3.33 5.00 2.00 2.00 5.00 5.00 5.00 2.00 3.34 - +# ALL-NEXT: 2.00 2.00 3.33 3.33 5.00 2.00 2.00 5.00 5.00 5.00 3.34 2.00 - # ALL: Resource pressure by instruction: # ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# ALL-NEXT: - - 0.33 0.33 - - - - 1.00 1.00 1.00 0.34 - addq $44, 64(%r14) -# ALL-NEXT: - - 0.33 0.34 1.00 - 1.00 1.00 - - - 0.33 - addq $44, 128(%r14) -# ALL-NEXT: - - 0.34 0.33 - 1.00 - - 1.00 1.00 - 0.33 - addq $44, 192(%r14) -# ALL-NEXT: - 1.00 0.33 0.33 1.00 - - 1.00 - - - 0.34 - addq $44, 256(%r14) -# ALL-NEXT: 1.00 - 0.33 0.34 - - - - 1.00 1.00 - 0.33 - addq $44, 320(%r14) -# ALL-NEXT: - - 0.34 0.33 1.00 - - 1.00 - - 1.00 0.33 - addq $44, 384(%r14) -# ALL-NEXT: - - 0.33 0.33 - - 1.00 - 1.00 1.00 - 0.34 - addq $44, 448(%r14) -# ALL-NEXT: - - 0.33 0.34 1.00 1.00 - 1.00 - - - 0.33 - addq $44, 512(%r14) -# ALL-NEXT: - 1.00 0.34 0.33 - - - - 1.00 1.00 - 0.33 - addq $44, 576(%r14) -# ALL-NEXT: 1.00 - 0.33 0.33 1.00 - - 1.00 - - - 0.34 - addq $44, 640(%r14) +# ALL-NEXT: - - 0.33 0.33 - - - - 1.00 1.00 0.34 1.00 - addq $44, 64(%r14) +# ALL-NEXT: - - 0.33 0.34 1.00 - 1.00 1.00 - - 0.33 - - addq $44, 128(%r14) +# ALL-NEXT: - - 0.34 0.33 - 1.00 - - 1.00 1.00 0.33 - - addq $44, 192(%r14) +# ALL-NEXT: - 1.00 0.33 0.33 1.00 - - 1.00 - - 0.34 - - addq $44, 256(%r14) +# ALL-NEXT: 1.00 - 0.33 0.34 - - - - 1.00 1.00 0.33 - - addq $44, 320(%r14) +# ALL-NEXT: - - 0.34 0.33 1.00 - - 1.00 - - 0.33 1.00 - addq $44, 384(%r14) +# ALL-NEXT: - - 0.33 0.33 - - 1.00 - 1.00 1.00 0.34 - - addq $44, 448(%r14) +# ALL-NEXT: - - 0.33 0.34 1.00 1.00 - 1.00 - - 0.33 - - addq $44, 512(%r14) +# ALL-NEXT: - 1.00 0.34 0.33 - - - - 1.00 1.00 0.33 - - addq $44, 576(%r14) +# ALL-NEXT: 1.00 - 0.33 0.33 1.00 - - 1.00 - - 0.34 - - addq $44, 640(%r14) # ALL: Timeline view: diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-adx.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-adx.s index b1345cd11bb9c..3fab0dfba8b97 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-adx.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-adx.s @@ -46,15 +46,15 @@ adox (%rbx), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 4.00 - 1.33 1.33 - - 4.00 - - - - 1.33 - +# CHECK-NEXT: 4.00 - 1.33 1.33 - - 4.00 - - - 1.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcxl %ebx, %ecx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adcxl (%rbx), %ecx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adcxl (%rbx), %ecx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcxq %rbx, %rcx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adcxq (%rbx), %rcx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adcxq (%rbx), %rcx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adoxl %ebx, %ecx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adoxl (%rbx), %ecx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adoxl (%rbx), %ecx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adoxq %rbx, %rcx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adoxq (%rbx), %rcx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adoxq (%rbx), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-aes.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-aes.s index b43f35e8347f7..8d2e4f6c78ce1 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-aes.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-aes.s @@ -58,19 +58,19 @@ aeskeygenassist $22, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 17.33 10.33 2.00 2.00 - 9.33 2.00 - - - - 2.00 - +# CHECK-NEXT: 17.33 10.33 2.00 2.00 - 9.33 2.00 - - - 2.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - aesdec %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - aesdec (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - aesdec (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - aesdeclast %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - aesdeclast (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - aesdeclast (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - aesenc %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - aesenc (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - aesenc (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - aesenclast %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - aesenclast (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - aesenclast (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - aesimc %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - aesimc (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - aesimc (%rax), %xmm2 # CHECK-NEXT: 5.83 2.33 - - - 4.83 1.00 - - - - - - aeskeygenassist $22, %xmm0, %xmm2 -# CHECK-NEXT: 5.50 2.00 0.33 0.33 - 4.50 1.00 - - - - 0.33 - aeskeygenassist $22, (%rax), %xmm2 +# CHECK-NEXT: 5.50 2.00 0.33 0.33 - 4.50 1.00 - - - 0.33 - - aeskeygenassist $22, (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx1.s index 3eb2864c5376d..5d2e480a97a8e 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx1.s @@ -1739,427 +1739,427 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 310.90 275.73 107.00 107.00 20.00 277.73 8.90 18.50 18.50 19.00 0.73 107.00 - +# CHECK-NEXT: 310.90 275.73 107.00 107.00 20.00 277.73 8.90 18.50 18.50 19.00 107.00 0.73 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddsubpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddsubpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddsubpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddsubpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddsubpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddsubps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddsubps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddsubps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddsubps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddsubps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesdec %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesdec (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesdec (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesdeclast %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesdeclast (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesdeclast (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesenc %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesenc (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesenc (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesenclast %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesenclast (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesenclast (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vaesimc %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vaesimc (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vaesimc (%rax), %xmm2 # CHECK-NEXT: 5.83 2.33 - - - 4.83 1.00 - - - - - - vaeskeygenassist $22, %xmm0, %xmm2 -# CHECK-NEXT: 5.50 2.00 0.33 0.33 - 4.50 1.00 - - - - 0.33 - vaeskeygenassist $22, (%rax), %xmm2 +# CHECK-NEXT: 5.50 2.00 0.33 0.33 - 4.50 1.00 - - - 0.33 - - vaeskeygenassist $22, (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vblendpd $11, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vblendpd $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vblendpd $11, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vblendpd $11, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vblendpd $11, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vblendpd $11, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vblendps $11, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vblendps $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vblendps $11, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vblendps $11, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vblendps $11, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vblendps $11, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 1.00 - - - 1.00 - - - - - - - vblendvpd %xmm3, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - vblendvpd %xmm3, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - vblendvpd %xmm3, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - 1.00 - - - - - - - vblendvpd %ymm3, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - vblendvpd %ymm3, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - vblendvpd %ymm3, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 1.00 - - - 1.00 - - - - - - - vblendvps %xmm3, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - vblendvps %xmm3, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - vblendvps %xmm3, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - 1.00 - - - - - - - vblendvps %ymm3, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - vblendvps %ymm3, (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastf128 (%rax), %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastsd (%rax), %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastss (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastss (%rax), %ymm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - vblendvps %ymm3, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastf128 (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastsd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastss (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastss (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcmpeqpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcmpeqpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcmpeqpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcmpeqpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcmpeqpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcmpeqpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcmpeqps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcmpeqps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcmpeqps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcmpeqps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcmpeqps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcmpeqps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcmpeqsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcmpeqsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcmpeqsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcmpeqss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcmpeqss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcmpeqss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcomisd %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcomisd (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcomisd (%rax), %xmm1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcomiss %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcomiss (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcomiss (%rax), %xmm1 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtdq2pd %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtdq2ps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtdq2ps %ymm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dqx (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dqx (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2dq %ymm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dqy (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dqy (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2psx (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2ps %ymm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2dq %ymm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2pd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvtsd2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvtsd2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtsd2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtsd2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtsd2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtsd2si (%rax), %rcx # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtsd2ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtsd2ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtsd2ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtsi2sd %ecx, %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtsi2sd %rcx, %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtsi2sdl (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtsi2sdq (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtsi2sdl (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtsi2sdq (%rax), %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtsi2ss %ecx, %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 2.00 - - - - - - - vcvtsi2ss %rcx, %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtsi2ssl (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtsi2ssq (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtsi2ssl (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtsi2ssq (%rax), %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtss2sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtss2sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtss2sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvtss2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - 1.00 - - - - - - - vcvtss2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtss2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtss2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtss2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtss2si (%rax), %rcx # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dqx (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dqx (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2dq %ymm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dqy (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dqy (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2dq %ymm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax), %ymm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvttsd2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvttsd2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttsd2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttsd2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttsd2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttsd2si (%rax), %rcx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvttss2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - 1.00 - - - - - - - vcvttss2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttss2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttss2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttss2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttss2si (%rax), %rcx # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.50 - - - 0.50 - - - - - - - vdppd $22, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.50 0.33 0.33 - 0.50 - - - - - 0.33 - vdppd $22, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.50 0.33 0.33 - 0.50 - - - - 0.33 - - vdppd $22, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.50 2.00 - - - 2.00 0.50 - - - - - - vdpps $22, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.50 2.00 0.33 0.33 - 2.00 0.50 - - - - 0.33 - vdpps $22, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.50 2.00 0.33 0.33 - 2.00 0.50 - - - 0.33 - - vdpps $22, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.50 2.00 - - - 2.00 0.50 - - - - - - vdpps $22, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.50 2.00 0.33 0.33 - 2.00 0.50 - - - - 0.33 - vdpps $22, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1.50 2.00 0.33 0.33 - 2.00 0.50 - - - 0.33 - - vdpps $22, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vextractf128 $1, %ymm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vextractf128 $1, %ymm0, (%rax) # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vextractps $1, %xmm0, %ecx # CHECK-NEXT: - - - - 0.50 1.00 - 0.50 0.50 0.50 - - - vextractps $1, %xmm0, (%rax) # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhaddpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhaddpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhaddpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhaddpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhaddpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhaddpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhaddps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhaddps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhaddps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhaddps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhaddps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhsubpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhsubpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhsubpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhsubpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhsubpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhsubps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhsubps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhsubps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhsubps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhsubps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinsertf128 $1, %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vinsertf128 $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vinsertf128 $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinsertps $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vinsertps $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vlddqu (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vlddqu (%rax), %ymm2 -# CHECK-NEXT: 1.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - vldmxcsr (%rax) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vinsertps $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vlddqu (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vlddqu (%rax), %ymm2 +# CHECK-NEXT: 1.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - vldmxcsr (%rax) # CHECK-NEXT: - - - - 1.50 - - - - 0.50 - - - vmaskmovdqu %xmm0, %xmm1 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmaskmovpd (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmaskmovpd (%rax), %ymm0, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmaskmovpd (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmaskmovpd (%rax), %ymm0, %ymm2 # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vmaskmovpd %xmm0, %xmm1, (%rax) # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vmaskmovpd %ymm0, %ymm1, (%rax) -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmaskmovps (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmaskmovps (%rax), %ymm0, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmaskmovps (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmaskmovps (%rax), %ymm0, %ymm2 # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vmaskmovps %xmm0, %xmm1, (%rax) # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vmaskmovps %ymm0, %ymm1, (%rax) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovapd %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovapd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovapd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovapd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovapd %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovapd %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovapd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovapd (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovaps %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovaps %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovaps (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovaps (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovaps %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovaps %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovaps (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovaps (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovd %eax, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmovd %xmm0, %ecx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovd %xmm0, (%rax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovddup %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovddup (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovddup (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovddup %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovddup (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovddup (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovdqa %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqa (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqa (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovdqa %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqa (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqa (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovhlps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovlhps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovhpd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vmovhpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vmovhpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovhps %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vmovhps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vmovhps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovlpd %xmm0, (%rax) -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vmovlpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vmovlpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovlps %xmm0, (%rax) -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vmovlps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vmovlps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmovmskpd %xmm0, %ecx # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmovmskpd %ymm0, %ecx # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmovmskps %xmm0, %ecx # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmovmskps %ymm0, %ecx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovntdq %xmm0, (%rax) # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovntdq %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovntdqa (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovntdqa (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovntdqa (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovntdqa (%rax), %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovntpd %xmm0, (%rax) # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovntpd %ymm0, (%rax) # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovntps %xmm0, (%rax) # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovntps %ymm0, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovq %xmm0, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovq %rax, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovq (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovq (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmovq %xmm0, %rcx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovq %xmm0, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovsd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovsd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovsd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovshdup %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovshdup (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovshdup (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovshdup %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovshdup (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovshdup (%rax), %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovsldup %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovsldup (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovsldup (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovsldup %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovsldup (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovsldup (%rax), %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovss %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovss (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovss (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovupd %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovupd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovupd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovupd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovupd %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovupd %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovupd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovupd (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovups %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovups %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovups (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovups (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovups %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovups %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovups (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovups (%rax), %ymm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vmpsadbw $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 1.50 - - - - - 0.33 - vmpsadbw $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 1.50 - - - - 0.33 - - vmpsadbw $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsw (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackssdw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackssdw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackssdw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpacksswb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpacksswb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpacksswb (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackusdw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackusdw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackusdw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackuswb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackuswb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackuswb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpalignr $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpalignr $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpalignr $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpand %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpand (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpand (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpandn %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpandn (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpandn (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - 1.00 - - - - - - - vpblendvb %xmm3, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - vpblendvb %xmm3, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - vpblendvb %xmm3, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpblendw $11, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpblendw $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpblendw $11, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpclmulqdq $11, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpclmulqdq $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpclmulqdq $11, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 4.17 1.67 - - - 1.67 0.50 - - - - - - vpcmpestri $1, %xmm0, %xmm2 -# CHECK-NEXT: 3.83 1.33 0.33 0.33 - 1.33 0.50 - - - - 0.33 - vpcmpestri $1, (%rax), %xmm2 +# CHECK-NEXT: 3.83 1.33 0.33 0.33 - 1.33 0.50 - - - 0.33 - - vpcmpestri $1, (%rax), %xmm2 # CHECK-NEXT: 4.50 2.00 - - - 2.00 0.50 - - - - - - vpcmpestrm $1, %xmm0, %xmm2 -# CHECK-NEXT: 4.17 1.67 0.33 0.33 - 1.67 0.50 - - - - 0.33 - vpcmpestrm $1, (%rax), %xmm2 +# CHECK-NEXT: 4.17 1.67 0.33 0.33 - 1.67 0.50 - - - 0.33 - - vpcmpestrm $1, (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpgtb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpgtb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpgtb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpgtd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpgtd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpgtd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpgtw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpgtw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpgtw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 3.00 - - - - - - - - - - - - vpcmpistri $1, %xmm0, %xmm2 -# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - - 0.33 - vpcmpistri $1, (%rax), %xmm2 +# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - 0.33 - - vpcmpistri $1, (%rax), %xmm2 # CHECK-NEXT: 3.00 - - - - - - - - - - - - vpcmpistrm $1, %xmm0, %xmm2 -# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - - 0.33 - vpcmpistrm $1, (%rax), %xmm2 +# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - 0.33 - - vpcmpistrm $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vperm2f128 $1, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vperm2f128 $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vperm2f128 $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $1, (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd $1, %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $1, (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $1, (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $1, (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps $1, %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $1, (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $1, (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - vpextrb $1, %xmm0, %ecx # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - vpextrb $1, %xmm0, (%rax) # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - vpextrd $1, %xmm0, %ecx @@ -2169,268 +2169,268 @@ vzeroupper # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - vpextrw $1, %xmm0, %ecx # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - vpextrw $1, %xmm0, (%rax) # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphaddd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphaddd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphaddd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 1.50 - - - 1.00 - - - - - - - vphaddsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - - 0.33 - vphaddsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - 0.33 - - vphaddsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphaddw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphaddw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphaddw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vphminposuw %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vphminposuw (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vphminposuw (%rax), %xmm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphsubd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphsubd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphsubd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 1.50 - - - 1.00 - - - - - - - vphsubsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - - 0.33 - vphsubsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - 0.33 - - vphsubsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphsubw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphsubw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphsubw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpinsrb $1, %eax, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpinsrb $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpinsrb $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpinsrd $1, %eax, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpinsrd $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpinsrd $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpinsrq $1, %rax, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpinsrq $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpinsrq $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpinsrw $1, %eax, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpinsrw $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpinsrw $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddubsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddubsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddubsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddwd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddwd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddwd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxub %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxub (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxub (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxud %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxud (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxud (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxuw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxuw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxuw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminub %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminub (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminub (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminud %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminud (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminud (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminuw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminuw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminuw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmovmskb %xmm0, %ecx # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbw %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbw (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxwd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxwd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxwd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxwq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxwq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxwq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbw %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbw (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxwd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxwd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxwd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxwq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxwq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxwq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmuldq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmuldq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmuldq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhrsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhrsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhrsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhuw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhuw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhuw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vpmulld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmullw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmullw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmullw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmuludq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmuludq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmuludq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpor %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpor (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpor (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpsadbw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpsadbw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpsadbw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufb (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufd $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $1, (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufhw $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufhw $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufhw $1, (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshuflw $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshuflw $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshuflw $1, (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsignb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsignb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsignb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsignd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsignd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsignd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsignw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsignw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsignw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpslld $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpslld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpslld (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpslld (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpslldq $1, %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllq $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsllq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllw $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsllw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrad $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsrad %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrad (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrad (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsraw $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsraw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrld $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsrld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrld (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrld (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpsrldq $1, %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlq $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsrlq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlw $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsrlw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vptest %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vptest (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vptest (%rax), %xmm1 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vptest %ymm0, %ymm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vptest (%rax), %ymm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vptest (%rax), %ymm1 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhbw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhbw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhbw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhdq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhqdq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhqdq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhqdq (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhwd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhwd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhwd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklbw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklbw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklbw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckldq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklqdq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklqdq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklqdq (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklwd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklwd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklwd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpxor %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpxor (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpxor (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrcpps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrcpps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrcpps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrcpps %ymm0, %ymm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrcpps (%rax), %ymm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrcpps (%rax), %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrcpss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrcpss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrcpss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vroundpd $1, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vroundpd $1, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vroundpd $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vroundpd $1, %ymm0, %ymm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vroundpd $1, (%rax), %ymm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vroundpd $1, (%rax), %ymm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vroundps $1, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vroundps $1, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vroundps $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vroundps $1, %ymm0, %ymm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vroundps $1, (%rax), %ymm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vroundps $1, (%rax), %ymm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vroundsd $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vroundsd $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vroundsd $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vroundss $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vroundss $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vroundss $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrsqrtps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrsqrtps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrsqrtps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrsqrtps %ymm0, %ymm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrsqrtps (%rax), %ymm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrsqrtps (%rax), %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrsqrtss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrsqrtss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrsqrtss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vshufpd $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vshufpd $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vshufpd $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vshufpd $1, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vshufpd $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vshufpd $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vshufps $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vshufps $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vshufps $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vshufps $1, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vshufps $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vshufps $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtpd %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtpd %ymm0, %ymm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax), %ymm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax), %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtps %ymm0, %ymm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax), %ymm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax), %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.50 - - - 0.50 - 0.50 0.50 0.50 0.50 - - - vstmxcsr (%rax) # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vtestpd %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vtestpd (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vtestpd (%rax), %xmm1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vtestpd %ymm0, %ymm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vtestpd (%rax), %ymm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vtestpd (%rax), %ymm1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vtestps %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vtestps (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vtestps (%rax), %xmm1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vtestps %ymm0, %ymm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vtestps (%rax), %ymm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vtestps (%rax), %ymm1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vucomisd %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vucomisd (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vucomisd (%rax), %xmm1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vucomiss %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vucomiss (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vucomiss (%rax), %xmm1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 2.23 4.07 - - - 1.07 1.90 - - - 0.73 - - vzeroall +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2.23 4.07 - - - 1.07 1.90 - - - - 0.73 - vzeroall # CHECK-NEXT: - - - - - - - - - - - - - vzeroupper diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx2.s index a058279f0644d..7ee90759a6076 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx2.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx2.s @@ -779,308 +779,308 @@ vpxor (%rax), %ymm1, %ymm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 111.00 130.00 65.33 65.33 2.50 135.00 - 2.50 2.50 2.50 - 65.33 - +# CHECK-NEXT: 111.00 130.00 65.33 65.33 2.50 135.00 - 2.50 2.50 2.50 65.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcasti128 (%rax), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcasti128 (%rax), %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastsd %xmm0, %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastss %xmm0, %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vextracti128 $1, %ymm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vextracti128 $1, %ymm0, (%rax) -# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - - 0.67 - vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 2.67 2.67 - 1.33 - - - - - 2.67 - vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2 -# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - - 0.67 - vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2 -# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - - 0.67 - vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2 +# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - 0.67 - - vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 2.67 2.67 - 1.33 - - - - 2.67 - - vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - 0.67 - - vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - 0.67 - - vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinserti128 $1, %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vinserti128 $1, (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovntdqa (%rax), %ymm0 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vinserti128 $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovntdqa (%rax), %ymm0 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vmpsadbw $1, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 1.50 - - - - - 0.33 - vmpsadbw $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 1.50 - - - - 0.33 - - vmpsadbw $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsb %ymm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsb (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsb (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsd %ymm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsd (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsd (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsw %ymm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsw (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsw (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackssdw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackssdw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackssdw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpacksswb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpacksswb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpacksswb (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackusdw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackusdw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackusdw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackuswb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackuswb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackuswb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpalignr $1, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpalignr $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpalignr $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpand %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpand (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpand (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpandn %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpandn (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpandn (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendd $11, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendd $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendd $11, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendd $11, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendd $11, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendd $11, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 1.00 - - - 1.00 - - - - - - - vpblendvb %ymm3, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - vpblendvb %ymm3, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - vpblendvb %ymm3, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpblendw $11, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpblendw $11, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpblendw $11, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %xmm0, %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastb (%rax), %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastb (%rax), %xmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %xmm0, %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastb (%rax), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastb (%rax), %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastd %xmm0, %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastd (%rax), %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastd (%rax), %xmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastd %xmm0, %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastd (%rax), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastd (%rax), %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastq %xmm0, %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastq (%rax), %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastq (%rax), %xmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastq %xmm0, %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastq (%rax), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastq (%rax), %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %xmm0, %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastw (%rax), %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastw (%rax), %xmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %xmm0, %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastw (%rax), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastw (%rax), %ymm0 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpgtb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpgtb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpgtb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpgtd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpgtd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpgtd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpgtw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpgtw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpgtw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vperm2i128 $1, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vperm2i128 $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vperm2i128 $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd $1, %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $1, (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $1, (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq $1, %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $1, (%rax), %ymm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 2.67 2.67 - 1.33 - - - - - 2.67 - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2 -# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - - 0.67 - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2 -# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - - 0.67 - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2 -# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - - 0.67 - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $1, (%rax), %ymm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 2.67 2.67 - 1.33 - - - - 2.67 - - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - 0.67 - - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2 +# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - 0.67 - - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2 +# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - 0.67 - - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphaddd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphaddd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphaddd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 1.50 - - - 1.00 - - - - - - - vphaddsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - - 0.33 - vphaddsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - 0.33 - - vphaddsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphaddw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphaddw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphaddw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphsubd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphsubd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphsubd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 1.50 - - - 1.00 - - - - - - - vphsubsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - - 0.33 - vphsubsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - 0.33 - - vphsubsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphsubw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphsubw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphsubw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddubsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddubsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddubsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddwd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddwd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpmaskmovd (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpmaskmovd (%rax), %ymm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddwd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpmaskmovd (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpmaskmovd (%rax), %ymm0, %ymm2 # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vpmaskmovd %xmm0, %xmm1, (%rax) # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vpmaskmovd %ymm0, %ymm1, (%rax) -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpmaskmovq (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpmaskmovq (%rax), %ymm0, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpmaskmovq (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpmaskmovq (%rax), %ymm0, %ymm2 # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vpmaskmovq %xmm0, %xmm1, (%rax) # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vpmaskmovq %ymm0, %ymm1, (%rax) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxub %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxub (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxub (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxud %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxud (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxud (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxuw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxuw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxuw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminub %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminub (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminub (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminud %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminud (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminud (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminuw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminuw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminuw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmovmskb %ymm0, %ecx # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbd %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbd (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbq %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbq (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbq (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbw %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbw (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbw (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxdq %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxdq (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxdq (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwd %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwd (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwq %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwq (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwq (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbd %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbd (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbq %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbq (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbq (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbw %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbw (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbw (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxdq %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxdq (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxdq (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwd %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwd (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwq %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwq (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwq (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmuldq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmuldq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmuldq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhrsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhrsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhrsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhuw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhuw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhuw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vpmulld %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmullw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmullw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmullw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmuludq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmuludq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmuludq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpor %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpor (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpor (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpsadbw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpsadbw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpsadbw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufb (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufd $1, %ymm0, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $1, (%rax), %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $1, (%rax), %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufhw $1, %ymm0, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufhw $1, (%rax), %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufhw $1, (%rax), %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshuflw $1, %ymm0, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshuflw $1, (%rax), %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshuflw $1, (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsignb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsignb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsignb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsignd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsignd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsignd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsignw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsignw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsignw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpslld $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpslld %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpslld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpslld (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpslldq $1, %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllq $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsllq %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllw $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsllw %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrad $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsrad %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrad (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrad (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsravd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsravd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsravd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsravd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsravd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsravd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsraw $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsraw %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrld $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsrld %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrld (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpsrldq $1, %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlq $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsrlq %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlw $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsrlw %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhbw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhbw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhbw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhdq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhqdq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhqdq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhqdq (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhwd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhwd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhwd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklbw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklbw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklbw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckldq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklqdq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklqdq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklqdq (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklwd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklwd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklwd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpxor %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpxor (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpxor (%rax), %ymm1, %ymm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512.s index 5ad7397a8ddc3..3b0ec774b513c 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512.s @@ -636,6 +636,66 @@ vpgatherdd (%rax,%zmm1,2), %zmm2 {k1} vpgatherqq (%rax,%zmm1,2), %zmm2 {k1} vpgatherqd (%rax,%zmm1,2), %ymm2 {k1} +vpmovdb %zmm19, %xmm16 +vpmovdb %zmm19, (%rax) +vpmovdb %zmm19, %xmm16 {k1} +vpmovdb %zmm19, (%rax) {k1} +vpmovdb %zmm19, %xmm16 {k1}{z} + +vpmovdw %zmm19, %ymm16 +vpmovdw %zmm19, (%rax) +vpmovdw %zmm19, %ymm16 {k1} +vpmovdw %zmm19, (%rax) {k1} +vpmovdw %zmm19, %ymm16 {k1}{z} + +vpmovqb %zmm19, %xmm16 +vpmovqb %zmm19, (%rax) +vpmovqb %zmm19, %xmm16 {k1} +vpmovqb %zmm19, (%rax) {k1} +vpmovqb %zmm19, %xmm16 {k1}{z} + +vpmovqd %zmm19, %ymm16 +vpmovqd %zmm19, (%rax) +vpmovqd %zmm19, %ymm16 {k1} +vpmovqd %zmm19, (%rax) {k1} +vpmovqd %zmm19, %ymm16 {k1}{z} + +vpmovqw %zmm19, %xmm16 +vpmovqw %zmm19, (%rax) +vpmovqw %zmm19, %xmm16 {k1} +vpmovqw %zmm19, (%rax) {k1} +vpmovqw %zmm19, %xmm16 {k1}{z} + +vpmovsdb %zmm19, %xmm16 +vpmovsdb %zmm19, (%rax) +vpmovsdb %zmm19, %xmm16 {k1} +vpmovsdb %zmm19, (%rax) {k1} +vpmovsdb %zmm19, %xmm16 {k1}{z} + +vpmovsdw %zmm19, %ymm16 +vpmovsdw %zmm19, (%rax) +vpmovsdw %zmm19, %ymm16 {k1} +vpmovsdw %zmm19, (%rax) {k1} +vpmovsdw %zmm19, %ymm16 {k1}{z} + +vpmovsqb %zmm19, %xmm16 +vpmovsqb %zmm19, (%rax) +vpmovsqb %zmm19, %xmm16 {k1} +vpmovsqb %zmm19, (%rax) {k1} +vpmovsqb %zmm19, %xmm16 {k1}{z} + +vpmovsqd %zmm19, %ymm16 +vpmovsqd %zmm19, (%rax) +vpmovsqd %zmm19, %ymm16 {k1} +vpmovsqd %zmm19, (%rax) {k1} +vpmovsqd %zmm19, %ymm16 {k1}{z} + +vpmovsqw %zmm19, %xmm16 +vpmovsqw %zmm19, (%rax) +vpmovsqw %zmm19, %xmm16 {k1} +vpmovsqw %zmm19, (%rax) {k1} +vpmovsqw %zmm19, %xmm16 {k1}{z} + vpmovsxbd %xmm16, %zmm19 vpmovsxbd (%rax), %zmm19 vpmovsxbd %xmm16, %zmm19 {k1} @@ -671,6 +731,36 @@ vpmovsxwq (%rax), %zmm19 {k1} vpmovsxwq %xmm16, %zmm19 {z}{k1} vpmovsxwq (%rax), %zmm19 {z}{k1} +vpmovusdb %zmm19, %xmm16 +vpmovusdb %zmm19, (%rax) +vpmovusdb %zmm19, %xmm16 {k1} +vpmovusdb %zmm19, (%rax) {k1} +vpmovusdb %zmm19, %xmm16 {k1}{z} + +vpmovusdw %zmm19, %ymm16 +vpmovusdw %zmm19, (%rax) +vpmovusdw %zmm19, %ymm16 {k1} +vpmovusdw %zmm19, (%rax) {k1} +vpmovusdw %zmm19, %ymm16 {k1}{z} + +vpmovusqb %zmm19, %xmm16 +vpmovusqb %zmm19, (%rax) +vpmovusqb %zmm19, %xmm16 {k1} +vpmovusqb %zmm19, (%rax) {k1} +vpmovusqb %zmm19, %xmm16 {k1}{z} + +vpmovusqd %zmm19, %ymm16 +vpmovusqd %zmm19, (%rax) +vpmovusqd %zmm19, %ymm16 {k1} +vpmovusqd %zmm19, (%rax) {k1} +vpmovusqd %zmm19, %ymm16 {k1}{z} + +vpmovusqw %zmm19, %xmm16 +vpmovusqw %zmm19, (%rax) +vpmovusqw %zmm19, %xmm16 {k1} +vpmovusqw %zmm19, (%rax) {k1} +vpmovusqw %zmm19, %xmm16 {k1}{z} + vpmovzxbd %xmm16, %zmm19 vpmovzxbd (%rax), %zmm19 vpmovzxbd %xmm16, %zmm19 {k1} @@ -1646,6 +1736,56 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 19 30 5.33 * vpgatherdd (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: 11 28 2.67 * vpgatherqq (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: 11 28 2.67 * vpgatherqd (%rax,%zmm1,2), %ymm2 {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovdb %zmm19, %xmm16 +# CHECK-NEXT: 4 12 2.00 * vpmovdb %zmm19, (%rax) +# CHECK-NEXT: 2 6 2.00 vpmovdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 15 2.00 * vpmovdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 6 2.00 vpmovdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovdw %zmm19, %ymm16 +# CHECK-NEXT: 4 12 2.00 * vpmovdw %zmm19, (%rax) +# CHECK-NEXT: 2 6 2.00 vpmovdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 4 15 2.00 * vpmovdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 6 2.00 vpmovdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovqb %zmm19, %xmm16 +# CHECK-NEXT: 4 12 2.00 * vpmovqb %zmm19, (%rax) +# CHECK-NEXT: 2 6 2.00 vpmovqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 15 2.00 * vpmovqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 6 2.00 vpmovqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vpmovqd %zmm19, %ymm16 +# CHECK-NEXT: 3 12 1.00 * vpmovqd %zmm19, (%rax) +# CHECK-NEXT: 1 3 1.00 vpmovqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 3 14 1.00 * vpmovqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 3 1.00 vpmovqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovqw %zmm19, %xmm16 +# CHECK-NEXT: 4 12 2.00 * vpmovqw %zmm19, (%rax) +# CHECK-NEXT: 2 6 2.00 vpmovqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 15 2.00 * vpmovqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 6 2.00 vpmovqw %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsdb %zmm19, %xmm16 +# CHECK-NEXT: 4 12 2.00 * vpmovsdb %zmm19, (%rax) +# CHECK-NEXT: 2 6 2.00 vpmovsdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 15 2.00 * vpmovsdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 6 2.00 vpmovsdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsdw %zmm19, %ymm16 +# CHECK-NEXT: 4 12 2.00 * vpmovsdw %zmm19, (%rax) +# CHECK-NEXT: 2 6 2.00 vpmovsdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 4 15 2.00 * vpmovsdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 6 2.00 vpmovsdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsqb %zmm19, %xmm16 +# CHECK-NEXT: 4 12 2.00 * vpmovsqb %zmm19, (%rax) +# CHECK-NEXT: 2 6 2.00 vpmovsqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 15 2.00 * vpmovsqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 6 2.00 vpmovsqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsqd %zmm19, %ymm16 +# CHECK-NEXT: 4 12 2.00 * vpmovsqd %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 4 15 2.00 * vpmovsqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsqw %zmm19, %xmm16 +# CHECK-NEXT: 4 12 2.00 * vpmovsqw %zmm19, (%rax) +# CHECK-NEXT: 2 6 2.00 vpmovsqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 15 2.00 * vpmovsqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 6 2.00 vpmovsqw %zmm19, %xmm16 {%k1} {z} # CHECK-NEXT: 1 3 1.00 vpmovsxbd %xmm16, %zmm19 # CHECK-NEXT: 2 11 1.00 * vpmovsxbd (%rax), %zmm19 # CHECK-NEXT: 1 3 1.00 vpmovsxbd %xmm16, %zmm19 {%k1} @@ -1676,6 +1816,31 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 2 11 1.00 * vpmovsxwq (%rax), %zmm19 {%k1} # CHECK-NEXT: 1 3 1.00 vpmovsxwq %xmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 1.00 * vpmovsxwq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusdb %zmm19, %xmm16 +# CHECK-NEXT: 4 12 2.00 * vpmovusdb %zmm19, (%rax) +# CHECK-NEXT: 2 6 2.00 vpmovusdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 15 2.00 * vpmovusdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 6 2.00 vpmovusdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusdw %zmm19, %ymm16 +# CHECK-NEXT: 4 12 2.00 * vpmovusdw %zmm19, (%rax) +# CHECK-NEXT: 2 6 2.00 vpmovusdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 4 15 2.00 * vpmovusdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 6 2.00 vpmovusdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusqb %zmm19, %xmm16 +# CHECK-NEXT: 4 12 2.00 * vpmovusqb %zmm19, (%rax) +# CHECK-NEXT: 2 6 2.00 vpmovusqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 15 2.00 * vpmovusqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 6 2.00 vpmovusqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusqd %zmm19, %ymm16 +# CHECK-NEXT: 4 12 2.00 * vpmovusqd %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovusqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 4 15 2.00 * vpmovusqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovusqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusqw %zmm19, %xmm16 +# CHECK-NEXT: 4 12 2.00 * vpmovusqw %zmm19, (%rax) +# CHECK-NEXT: 2 6 2.00 vpmovusqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 15 2.00 * vpmovusqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 6 2.00 vpmovusqw %zmm19, %xmm16 {%k1} {z} # CHECK-NEXT: 1 3 1.00 vpmovzxbd %xmm16, %zmm19 # CHECK-NEXT: 2 11 1.00 * vpmovzxbd (%rax), %zmm19 # CHECK-NEXT: 1 3 1.00 vpmovzxbd %xmm16, %zmm19 {%k1} @@ -2058,7 +2223,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 508.60 13.60 218.67 218.67 48.50 578.60 1.60 48.50 48.50 48.50 1.60 218.67 - +# CHECK-NEXT: 508.60 13.60 218.67 218.67 63.50 723.60 1.60 63.50 63.50 63.50 218.67 1.60 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -2072,943 +2237,1018 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - kshiftrw $2, %k1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - kunpckbw %k0, %k1, %k2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vaddpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vaddpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vaddpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vaddps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vaddps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vaddps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignd $1, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignd $1, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignd $1, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignq $1, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignq $1, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignq $1, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastf32x4 (%rax), %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastf32x4 (%rax), %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastf32x4 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastf64x4 (%rax), %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastf64x4 (%rax), %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastf64x4 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcasti32x4 (%rax), %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcasti32x4 (%rax), %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcasti32x4 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcasti64x4 (%rax), %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcasti64x4 (%rax), %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcasti64x4 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastf32x4 (%rax), %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastf32x4 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastf32x4 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastf64x4 (%rax), %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastf64x4 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastf64x4 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcasti32x4 (%rax), %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcasti32x4 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcasti32x4 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcasti64x4 (%rax), %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcasti64x4 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcasti64x4 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastsd %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastsd (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastsd (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastsd %xmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastsd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastsd (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastsd %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastsd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastsd (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastss %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastss (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastss (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastss %xmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastss (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastss (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastss %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastss (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastss (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqpd %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqpd (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqpd (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqpd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqpd (%rax){1to8}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqpd %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqpd (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqpd (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqpd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqpd (%rax){1to8}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqps %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqps (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqps (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqps (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqps (%rax){1to16}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqps %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqps (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqps (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqps (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqps (%rax){1to16}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqsd %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqsd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqsd (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqsd %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqsd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqsd (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqss %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqss (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqss (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqss %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqss (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqss (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcomiss %xmm16, %xmm17 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcomiss (%rax), %xmm17 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcomiss (%rax), %xmm17 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtdq2pd %ymm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtdq2pd %ymm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtdq2pd %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtdq2ps %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax){1to16}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtdq2ps %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax){1to16}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtdq2ps %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtpd2dq %zmm16, %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dq (%rax), %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dq (%rax), %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dq (%rax){1to8}, %ymm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtpd2dq %zmm16, %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtpd2dq %zmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtpd2udq %zmm16, %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udq (%rax), %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udq (%rax), %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udq (%rax){1to8}, %ymm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtpd2udq %zmm16, %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtpd2udq %zmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvttpd2dq %zmm16, %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dq (%rax), %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dq (%rax), %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dq (%rax){1to8}, %ymm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvttpd2dq %zmm16, %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvttpd2dq %zmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvttpd2udq %zmm16, %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udq (%rax), %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udq (%rax), %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udq (%rax){1to8}, %ymm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvttpd2udq %zmm16, %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvttpd2udq %zmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtpd2ps %zmm16, %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2ps (%rax), %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2ps (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2ps (%rax), %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2ps (%rax){1to8}, %ymm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtpd2ps %zmm16, %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2ps (%rax), %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2ps (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtpd2ps %zmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2ps (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2ps (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtps2dq %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax){1to16}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtps2dq %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax){1to16}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtps2dq %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvttps2dq %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax){1to16}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvttps2dq %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax){1to16}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvttps2dq %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtps2pd %ymm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2pd (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2pd (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2pd (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtps2pd %ymm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2pd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2pd (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtps2pd %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2pd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvtsd2usi %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvtsd2usi %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtsd2usi (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtsd2usi (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtsd2usi (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtsd2usi (%rax), %rcx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvtss2usi %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - 1.00 - - - - - - - vcvtss2usi %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtss2usi (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtss2usi (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtss2usi (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtss2usi (%rax), %rcx # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtps2udq %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax){1to16}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtps2udq %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax){1to16}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtps2udq %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvttps2udq %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax){1to16}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvttps2udq %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax){1to16}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvttps2udq %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvttsd2usi %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvttsd2usi %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttsd2usi (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttsd2usi (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttsd2usi (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttsd2usi (%rax), %rcx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvttss2usi %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - 1.00 - - - - - - - vcvttss2usi %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttss2usi (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttss2usi (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttss2usi (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttss2usi (%rax), %rcx # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtudq2pd %ymm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtudq2pd (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtudq2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtudq2pd (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtudq2pd (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtudq2pd %ymm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtudq2pd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtudq2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtudq2pd %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtudq2pd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtudq2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtudq2ps %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtudq2ps (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtudq2ps (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtudq2ps (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtudq2ps (%rax){1to16}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtudq2ps %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtudq2ps (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtudq2ps (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtudq2ps %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtudq2ps (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtudq2ps (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtusi2sd %ecx, %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtusi2sd %rcx, %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtusi2sdl (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtusi2sdq (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtusi2sdl (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtusi2sdq (%rax), %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtusi2ss %ecx, %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 2.00 - - - - - - - vcvtusi2ss %rcx, %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtusi2ssl (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtusi2ssq (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtusi2ssl (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtusi2ssq (%rax), %xmm0, %xmm2 # CHECK-NEXT: 2.50 - - - - 0.50 - - - - - - - vdivpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vdivpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vdivpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 2.50 - - - - 0.50 - - - - - - - vdivpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vdivpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vdivpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 2.50 - - - - 0.50 - - - - - - - vdivpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vdivpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vdivpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2.50 - - - - 0.50 - - - - - - - vdivps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vdivps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vdivps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 2.50 - - - - 0.50 - - - - - - - vdivps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vdivps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 2.50 - - - - 0.50 - - - - - - - vdivps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vdivps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - {evex} vextractps $1, %xmm0, %ecx # CHECK-NEXT: - - - - 0.50 1.00 - 0.50 0.50 0.50 - - - {evex} vextractps $1, %xmm0, (%rax) # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd132pd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd213pd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd231pd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd132ps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd213ps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd231ps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 2.67 2.67 - 2.00 - - - - - 2.67 - vgatherdpd (%rax,%ymm1,2), %zmm2 {%k1} -# CHECK-NEXT: 1.00 - 5.33 5.33 - 2.00 - - - - - 5.33 - vgatherdps (%rax,%zmm1,2), %zmm2 {%k1} -# CHECK-NEXT: 1.00 - 2.67 2.67 - 2.00 - - - - - 2.67 - vgatherqpd (%rax,%zmm1,2), %zmm2 {%k1} -# CHECK-NEXT: 1.00 - 2.67 2.67 - 2.00 - - - - - 2.67 - vgatherqps (%rax,%zmm1,2), %ymm2 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 2.67 2.67 - 2.00 - - - - 2.67 - - vgatherdpd (%rax,%ymm1,2), %zmm2 {%k1} +# CHECK-NEXT: 1.00 - 5.33 5.33 - 2.00 - - - - 5.33 - - vgatherdps (%rax,%zmm1,2), %zmm2 {%k1} +# CHECK-NEXT: 1.00 - 2.67 2.67 - 2.00 - - - - 2.67 - - vgatherqpd (%rax,%zmm1,2), %zmm2 {%k1} +# CHECK-NEXT: 1.00 - 2.67 2.67 - 2.00 - - - - 2.67 - - vgatherqps (%rax,%zmm1,2), %ymm2 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmaxpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmaxpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmaxpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmaxps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmaxps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmaxps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vminpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vminpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vminpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vminps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vminps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vminps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vminps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vminps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vminps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vminps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vminps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vminps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vminps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vminps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vminps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vminps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vminps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vminps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovapd %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovapd (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovapd (%rax), %zmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovapd %zmm16, (%rax) # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovapd %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovapd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovapd (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovapd %zmm16, (%rax) {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovapd %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovapd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovapd (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovaps %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovaps (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovaps (%rax), %zmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovaps %zmm16, (%rax) # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovaps %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovaps (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovaps (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovaps %zmm16, (%rax) {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovaps %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovaps (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovaps (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovddup %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovddup (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovddup (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovddup %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovddup (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovddup (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovddup %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovddup (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovddup (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqa32 %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqa32 (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqa32 (%rax), %zmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa32 %zmm16, (%rax) # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovdqa32 %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovdqa32 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovdqa32 (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa32 %zmm16, (%rax) {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovdqa32 %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovdqa32 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovdqa32 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqa64 %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqa64 (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqa64 (%rax), %zmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa64 %zmm16, (%rax) # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovdqa64 %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovdqa64 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovdqa64 (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa64 %zmm16, (%rax) {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovdqa64 %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovdqa64 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovdqa64 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu32 %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu32 (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu32 (%rax), %zmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu32 %zmm16, (%rax) # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovdqu32 %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovdqu32 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovdqu32 (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu32 %zmm16, (%rax) {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovdqu32 %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovdqu32 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovdqu32 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu64 %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu64 (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu64 (%rax), %zmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu64 %zmm16, (%rax) # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovdqu64 %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovdqu64 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovdqu64 (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu64 %zmm16, (%rax) {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovdqu64 %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovdqu64 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovntdqa (%rax), %zmm0 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovdqu64 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovntdqa (%rax), %zmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovshdup %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovshdup (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovshdup (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovshdup %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovshdup (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovshdup (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovshdup %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovshdup (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovshdup (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovsldup %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovsldup (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovsldup (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovsldup %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovsldup (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovsldup (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovsldup %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovsldup (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovsldup (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovupd %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovupd (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovupd (%rax), %zmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovupd %zmm16, (%rax) # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovupd %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovupd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovupd (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovupd %zmm16, (%rax) {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovupd %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovupd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovupd (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovups %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovups (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovups (%rax), %zmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovups %zmm16, (%rax) # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovups %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovups (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovups (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovups %zmm16, (%rax) {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovups %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovups (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovups (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmulpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmulpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmulpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmulps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmulps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmulps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpabsd %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsd (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsd (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsd (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsd (%rax){1to16}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpabsd %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsd (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsd (%rax){1to16}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpabsd %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsd (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsd (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpabsq %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsq (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsq (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpabsq %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsq (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsq (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpabsq %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsq (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsq (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpaddd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddd (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddd (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpaddd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddd (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddd (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpaddd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpaddq %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddq (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddq (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddq (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddq (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpaddq %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddq (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddq (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddq (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddq (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpaddq %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddq (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddq (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastd %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastd (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastd (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastd %xmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpbroadcastd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpbroadcastd (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastd %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpbroadcastd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpbroadcastd (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastq %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastq (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastq (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastq %xmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpbroadcastq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpbroadcastq (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastq %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpbroadcastq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpbroadcastq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqd %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax){1to16}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqd %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax){1to16}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqd %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax){1to16}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqd %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax){1to16}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqq %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax){1to8}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqq %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax){1to8}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtd %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtd (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtd (%rax){1to16}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtd %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtd (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtd (%rax){1to16}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtq %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax){1to8}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtq %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax){1to8}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqq %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax){1to8}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqq %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax){1to8}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequd %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequd (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequd (%rax){1to16}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequd %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequd (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequd (%rax){1to16}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequq %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequq (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequq (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequq (%rax){1to8}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequq %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequq (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequq (%rax){1to8}, %zmm1, %k2 {%k3} -# CHECK-NEXT: 1.00 - 2.67 2.67 - 2.00 - - - - - 2.67 - vpgatherdq (%rax,%ymm1,2), %zmm2 {%k1} -# CHECK-NEXT: 1.00 - 5.33 5.33 - 2.00 - - - - - 5.33 - vpgatherdd (%rax,%zmm1,2), %zmm2 {%k1} -# CHECK-NEXT: 1.00 - 2.67 2.67 - 2.00 - - - - - 2.67 - vpgatherqq (%rax,%zmm1,2), %zmm2 {%k1} -# CHECK-NEXT: 1.00 - 2.67 2.67 - 2.00 - - - - - 2.67 - vpgatherqd (%rax,%zmm1,2), %ymm2 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: 1.00 - 2.67 2.67 - 2.00 - - - - 2.67 - - vpgatherdq (%rax,%ymm1,2), %zmm2 {%k1} +# CHECK-NEXT: 1.00 - 5.33 5.33 - 2.00 - - - - 5.33 - - vpgatherdd (%rax,%zmm1,2), %zmm2 {%k1} +# CHECK-NEXT: 1.00 - 2.67 2.67 - 2.00 - - - - 2.67 - - vpgatherqq (%rax,%zmm1,2), %zmm2 {%k1} +# CHECK-NEXT: 1.00 - 2.67 2.67 - 2.00 - - - - 2.67 - - vpgatherqd (%rax,%zmm1,2), %ymm2 {%k1} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovdb %zmm19, %xmm16 +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovdb %zmm19, (%rax) +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovdw %zmm19, %ymm16 +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovdw %zmm19, (%rax) +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovqb %zmm19, %xmm16 +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovqb %zmm19, (%rax) +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovqd %zmm19, %ymm16 +# CHECK-NEXT: - - - - 0.50 1.00 - 0.50 0.50 0.50 - - - vpmovqd %zmm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - 0.50 1.00 - 0.50 0.50 0.50 - - - vpmovqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovqw %zmm19, %xmm16 +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovqw %zmm19, (%rax) +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovqw %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovsdb %zmm19, %xmm16 +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovsdb %zmm19, (%rax) +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovsdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovsdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovsdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovsdw %zmm19, %ymm16 +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovsdw %zmm19, (%rax) +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovsdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovsdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovsdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovsqb %zmm19, %xmm16 +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovsqb %zmm19, (%rax) +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovsqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovsqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovsqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovsqd %zmm19, %ymm16 +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovsqd %zmm19, (%rax) +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovsqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovsqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovsqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovsqw %zmm19, %xmm16 +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovsqw %zmm19, (%rax) +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovsqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovsqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovsqw %zmm19, %xmm16 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbd %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbd (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbd (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbd %xmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbd (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbd %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbd (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbq %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbq (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbq (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbq %xmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbq (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbq (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbq %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxdq %ymm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxdq (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxdq (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxdq %ymm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxdq (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxdq (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxdq %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxdq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxdq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwd %ymm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwd (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwd (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwd %ymm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwd (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwd %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwd (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwq %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwq (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwq (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwq %xmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwq (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwq (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwq %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovusdb %zmm19, %xmm16 +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovusdb %zmm19, (%rax) +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovusdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovusdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovusdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovusdw %zmm19, %ymm16 +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovusdw %zmm19, (%rax) +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovusdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovusdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovusdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovusqb %zmm19, %xmm16 +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovusqb %zmm19, (%rax) +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovusqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovusqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovusqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovusqd %zmm19, %ymm16 +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovusqd %zmm19, (%rax) +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovusqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovusqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovusqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovusqw %zmm19, %xmm16 +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovusqw %zmm19, (%rax) +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovusqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovusqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovusqw %zmm19, %xmm16 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbd %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbd (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbd (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbd %xmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbd (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbd %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbd (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbq %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbq (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbq (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbq %xmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbq (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbq (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbq %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxdq %ymm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxdq (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxdq (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxdq %ymm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxdq (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxdq (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxdq %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxdq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxdq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwd %ymm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwd (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwd (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwd %ymm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwd (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwd %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwd (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwq %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwq (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwq (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwq %xmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwq (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwq (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwq %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 2.00 - - - - - - - - - - - - vpmulld %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 2.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 2.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 2.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 2.00 - - - - - - - - - - - - vpmulld %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 2.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 2.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 2.00 - - - - - - - - - - - - vpmulld %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd $0, %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax), %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax){1to8}, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax){1to8}, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd $0, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd $0, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps $0, %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax), %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax){1to16}, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax){1to16}, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps $0, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax){1to16}, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps $0, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd $0, %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $0, (%rax), %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $0, (%rax){1to8}, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $0, (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $0, (%rax){1to8}, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd $0, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $0, (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $0, (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $0, (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $0, (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd $0, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $0, (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $0, (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $0, (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $0, (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq $0, %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $0, (%rax), %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $0, (%rax){1to8}, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $0, (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $0, (%rax){1to8}, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq $0, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $0, (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $0, (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $0, (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $0, (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq $0, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $0, (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $0, (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $0, (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $0, (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2.20 0.20 - - 8.00 0.20 0.20 8.00 8.00 8.00 0.20 - - vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1} -# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 0.20 - - vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1} -# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 0.20 - - vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1} -# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 0.20 - - vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2.20 0.20 - - 8.00 0.20 0.20 8.00 8.00 8.00 - 0.20 - vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1} +# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 - 0.20 - vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1} +# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 - 0.20 - vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1} +# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 - 0.20 - vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpshufd $0, %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufd $0, (%rax), %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufd $0, (%rax){1to16}, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufd $0, (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufd $0, (%rax){1to16}, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpshufd $0, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufd $0, (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufd $0, (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufd $0, (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufd $0, (%rax){1to16}, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpshufd $0, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufd $0, (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufd $0, (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufd $0, (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufd $0, (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubd (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubd (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubd (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubd (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubq %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubq (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubq (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubq (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubq (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubq %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubq (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubq (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubq (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubq (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubq %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubq (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubq (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckhdq %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhdq (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhdq (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhdq (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhdq (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckhdq %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhdq (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhdq (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhdq (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhdq (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckhdq %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhdq (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhdq (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhdq (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhdq (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckhqdq %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhqdq (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhqdq (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhqdq (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhqdq (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckhqdq %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhqdq (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhqdq (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckhqdq %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhqdq (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhqdq (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckldq %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckldq (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckldq (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckldq (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckldq (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckldq %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckldq (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckldq (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckldq (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckldq (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckldq %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckldq (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckldq (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckldq (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckldq (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpcklqdq %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpcklqdq (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpcklqdq (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpcklqdq %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpcklqdq (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpcklqdq (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpcklqdq %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpcklqdq (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2.20 0.20 - - 8.00 0.20 0.20 8.00 8.00 8.00 0.20 - - vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1} -# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 0.20 - - vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1} -# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 0.20 - - vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1} -# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 0.20 - - vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpcklqdq (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2.20 0.20 - - 8.00 0.20 0.20 8.00 8.00 8.00 - 0.20 - vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1} +# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 - 0.20 - vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1} +# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 - 0.20 - vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1} +# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 - 0.20 - vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshuff32x4 $0, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff32x4 $0, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshuff32x4 $0, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff32x4 $0, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshuff32x4 $0, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff32x4 $0, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshuff64x2 $0, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff64x2 $0, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff64x2 $0, (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff64x2 $0, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff64x2 $0, (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshuff64x2 $0, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff64x2 $0, (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff64x2 $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff64x2 $0, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff64x2 $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshuff64x2 $0, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff64x2 $0, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff64x2 $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff64x2 $0, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff64x2 $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshufi32x4 $0, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi32x4 $0, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi32x4 $0, (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi32x4 $0, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi32x4 $0, (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshufi32x4 $0, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi32x4 $0, (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi32x4 $0, (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi32x4 $0, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi32x4 $0, (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshufi32x4 $0, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi32x4 $0, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi32x4 $0, (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi32x4 $0, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi32x4 $0, (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshufi64x2 $0, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi64x2 $0, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi64x2 $0, (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi64x2 $0, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi64x2 $0, (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshufi64x2 $0, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi64x2 $0, (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi64x2 $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi64x2 $0, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi64x2 $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshufi64x2 $0, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi64x2 $0, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi64x2 $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi64x2 $0, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi64x2 $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2.50 - - - - 0.50 - - - - - - - vsqrtpd %zmm16, %zmm19 -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsqrtpd (%rax), %zmm19 -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsqrtpd (%rax){1to8}, %zmm19 +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsqrtpd (%rax), %zmm19 +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsqrtpd (%rax){1to8}, %zmm19 # CHECK-NEXT: 2.50 - - - - 0.50 - - - - - - - vsqrtpd %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsqrtpd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsqrtpd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsqrtpd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsqrtpd (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 2.50 - - - - 0.50 - - - - - - - vsqrtpd %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsqrtpd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsqrtpd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsqrtpd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsqrtpd (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 2.50 - - - - 0.50 - - - - - - - vsqrtps %zmm16, %zmm19 -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsqrtps (%rax), %zmm19 -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsqrtps (%rax){1to16}, %zmm19 +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsqrtps (%rax), %zmm19 +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsqrtps (%rax){1to16}, %zmm19 # CHECK-NEXT: 2.50 - - - - 0.50 - - - - - - - vsqrtps %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsqrtps (%rax), %zmm19 {%k1} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsqrtps (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsqrtps (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsqrtps (%rax){1to16}, %zmm19 {%k1} # CHECK-NEXT: 2.50 - - - - 0.50 - - - - - - - vsqrtps %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsqrtps (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsqrtps (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsqrtps (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsqrtps (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtsd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtsd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtsd (%rax), %xmm17, %xmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtsd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtsd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtsd (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtsd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtsd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtsd (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtss %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtss (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtss (%rax), %xmm17, %xmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtss %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtss (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtss (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtss %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtss (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtss (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubd (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubd (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubd (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubd (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubq %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubq (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubq (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubq (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubq (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubq %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubq (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubq (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubq (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubq (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubq %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubq (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubq (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmd %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmd (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmd (%rax){1to16}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmd %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmd (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmd (%rax){1to16}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmq %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmq (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmq (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmq (%rax){1to8}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmq %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmq (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmq (%rax){1to8}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmd %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmd (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmd (%rax){1to16}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmd %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmd (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmd (%rax){1to16}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmq %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmq (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmq (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmq (%rax){1to8}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmq %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmq (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmq (%rax){1to8}, %zmm1, %k2 {%k3} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vsubpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vsubpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vsubpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vsubps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vsubps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vsubps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vucomiss %xmm16, %xmm17 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vucomiss (%rax), %xmm17 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vucomiss (%rax), %xmm17 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bitalg.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bitalg.s index d4ae44a3ca80e..6fe1e5bb144ce 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bitalg.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bitalg.s @@ -63,23 +63,23 @@ vpshufbitqmb (%rdi), %zmm17, %k2 {%k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 4.00 - 2.67 2.67 - 16.00 - - - - - 2.67 - +# CHECK-NEXT: 4.00 - 2.67 2.67 - 16.00 - - - - 2.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntb %zmm1, %zmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntb (%rdi), %zmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntb (%rdi), %zmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntb %zmm1, %zmm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntb (%rdi), %zmm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntb (%rdi), %zmm0 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntb %zmm1, %zmm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntb (%rdi), %zmm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntb (%rdi), %zmm0 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntw %zmm1, %zmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntw (%rdi), %zmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntw (%rdi), %zmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntw %zmm1, %zmm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntw (%rdi), %zmm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntw (%rdi), %zmm0 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntw %zmm1, %zmm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntw (%rdi), %zmm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntw (%rdi), %zmm0 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpshufbitqmb %zmm16, %zmm17, %k2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufbitqmb (%rdi), %zmm17, %k2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufbitqmb (%rdi), %zmm17, %k2 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpshufbitqmb %zmm16, %zmm17, %k2 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufbitqmb (%rdi), %zmm17, %k2 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufbitqmb (%rdi), %zmm17, %k2 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bitalgvl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bitalgvl.s index 7c2d8fa99835d..e0fa580d09187 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bitalgvl.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bitalgvl.s @@ -98,39 +98,39 @@ vpshufbitqmb (%rdi), %ymm17, %k2 {%k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 8.00 - 5.33 5.33 - 32.00 - - - - - 5.33 - +# CHECK-NEXT: 8.00 - 5.33 5.33 - 32.00 - - - - 5.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntb %xmm1, %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntb (%rdi), %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntb (%rdi), %xmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntb %xmm1, %xmm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntb (%rdi), %xmm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntb (%rdi), %xmm0 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntb %xmm1, %xmm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntb (%rdi), %xmm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntb (%rdi), %xmm0 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntb %ymm1, %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntb (%rdi), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntb (%rdi), %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntb %ymm1, %ymm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntb (%rdi), %ymm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntb (%rdi), %ymm0 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntb %ymm1, %ymm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntb (%rdi), %ymm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntb (%rdi), %ymm0 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntw %xmm1, %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntw (%rdi), %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntw (%rdi), %xmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntw %xmm1, %xmm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntw (%rdi), %xmm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntw (%rdi), %xmm0 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntw %xmm1, %xmm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntw (%rdi), %xmm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntw (%rdi), %xmm0 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntw %ymm1, %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntw (%rdi), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntw (%rdi), %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntw %ymm1, %ymm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntw (%rdi), %ymm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntw (%rdi), %ymm0 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntw %ymm1, %ymm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntw (%rdi), %ymm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntw (%rdi), %ymm0 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpshufbitqmb %xmm16, %xmm17, %k2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufbitqmb (%rdi), %xmm17, %k2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufbitqmb (%rdi), %xmm17, %k2 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpshufbitqmb %xmm16, %xmm17, %k2 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufbitqmb (%rdi), %xmm17, %k2 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufbitqmb (%rdi), %xmm17, %k2 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpshufbitqmb %ymm16, %ymm17, %k2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufbitqmb (%rdi), %ymm17, %k2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufbitqmb (%rdi), %ymm17, %k2 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpshufbitqmb %ymm16, %ymm17, %k2 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufbitqmb (%rdi), %ymm17, %k2 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufbitqmb (%rdi), %ymm17, %k2 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bw.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bw.s index 989d72185f8fa..f865b679db3f2 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bw.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bw.s @@ -373,16 +373,19 @@ vpmovswb %zmm16, %ymm19 vpmovswb %zmm16, (%rax) vpmovswb %zmm16, %ymm19 {k1} vpmovswb %zmm16, (%rax) {k1} +vpmovswb %zmm16, %ymm19 {z}{k1} vpmovuswb %zmm16, %ymm19 vpmovuswb %zmm16, (%rax) vpmovuswb %zmm16, %ymm19 {k1} vpmovuswb %zmm16, (%rax) {k1} +vpmovuswb %zmm16, %ymm19 {z}{k1} vpmovwb %zmm16, %ymm19 vpmovwb %zmm16, (%rax) vpmovwb %zmm16, %ymm19 {k1} vpmovwb %zmm16, (%rax) {k1} +vpmovwb %zmm16, %ymm19 {z}{k1} vpmovzxbw %ymm16, %zmm19 vpmovzxbw (%rax), %zmm19 @@ -919,14 +922,17 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 4 12 2.00 * vpmovswb %zmm16, (%rax) # CHECK-NEXT: 2 6 2.00 vpmovswb %zmm16, %ymm19 {%k1} # CHECK-NEXT: 4 15 2.00 * vpmovswb %zmm16, (%rax) {%k1} +# CHECK-NEXT: 2 6 2.00 vpmovswb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: 2 4 2.00 vpmovuswb %zmm16, %ymm19 # CHECK-NEXT: 4 12 2.00 * vpmovuswb %zmm16, (%rax) # CHECK-NEXT: 2 6 2.00 vpmovuswb %zmm16, %ymm19 {%k1} # CHECK-NEXT: 4 15 2.00 * vpmovuswb %zmm16, (%rax) {%k1} +# CHECK-NEXT: 2 6 2.00 vpmovuswb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: 2 4 2.00 vpmovwb %zmm16, %ymm19 # CHECK-NEXT: 4 12 2.00 * vpmovwb %zmm16, (%rax) # CHECK-NEXT: 2 6 2.00 vpmovwb %zmm16, %ymm19 {%k1} # CHECK-NEXT: 4 15 2.00 * vpmovwb %zmm16, (%rax) {%k1} +# CHECK-NEXT: 2 6 2.00 vpmovwb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: 1 3 1.00 vpmovzxbw %ymm16, %zmm19 # CHECK-NEXT: 2 11 1.00 * vpmovzxbw (%rax), %zmm19 # CHECK-NEXT: 1 5 1.00 vpmovzxbw %ymm16, %zmm19 {%k1} @@ -1129,7 +1135,7 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 269.20 4.20 73.33 73.33 7.50 272.20 0.20 7.50 7.50 7.50 0.20 73.33 - +# CHECK-NEXT: 269.20 4.20 73.33 73.33 7.50 278.20 0.20 7.50 7.50 7.50 73.33 0.20 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -1140,12 +1146,12 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - kandnd %k0, %k1, %k2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - kandnq %k0, %k1, %k2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - kmovd %k0, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - kmovd (%rax), %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - kmovd (%rax), %k2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - kmovd %k0, (%rax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - kmovd %eax, %k2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - kmovd %k0, %eax # CHECK-NEXT: 1.00 - - - - - - - - - - - - kmovq %k0, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - kmovq (%rax), %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - kmovq (%rax), %k2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - kmovq %k0, (%rax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - kmovq %rax, %k2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - kmovq %k0, %rax @@ -1168,468 +1174,471 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - kxord %k0, %k1, %k2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - kxorq %k0, %k1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vdbpsadbw $0, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vdbpsadbw $0, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vdbpsadbw $0, (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vdbpsadbw $0, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vdbpsadbw $0, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vdbpsadbw $0, (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vdbpsadbw $0, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vdbpsadbw $0, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vdbpsadbw $0, (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu8 %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu8 (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu8 (%rax), %zmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu8 %zmm16, (%rax) # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovdqu8 %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovdqu8 (%rax), %zmm19 {%k1} -# CHECK-NEXT: 0.20 0.20 - - 1.00 0.20 0.20 1.00 1.00 1.00 0.20 - - vmovdqu8 %zmm16, (%rax) {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovdqu8 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.20 0.20 - - 1.00 0.20 0.20 1.00 1.00 1.00 - 0.20 - vmovdqu8 %zmm16, (%rax) {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovdqu8 %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovdqu8 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovdqu8 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu16 %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu16 (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu16 (%rax), %zmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu16 %zmm16, (%rax) # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovdqu16 %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovdqu16 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovdqu16 (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu16 %zmm16, (%rax) {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovdqu16 %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovdqu16 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovdqu16 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpabsb %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsb (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsb (%rax), %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpabsb %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsb (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsb (%rax), %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpabsb %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsb (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsb (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpabsw %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsw (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsw (%rax), %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpabsw %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsw (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsw (%rax), %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpabsw %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsw (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsw (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackssdw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackssdw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackssdw (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackssdw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackssdw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackssdw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackssdw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackssdw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackssdw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpacksswb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpacksswb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpacksswb (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpacksswb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpacksswb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpacksswb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpacksswb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpacksswb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpacksswb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackusdw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackusdw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackusdw (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackusdw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackusdw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackusdw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackusdw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackusdw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackusdw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackuswb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackuswb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackuswb (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackuswb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackuswb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackuswb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackuswb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackuswb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackuswb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpaddb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddb (%rax), %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpaddb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpaddb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpaddsb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpaddsb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpaddsb (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpaddsb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpaddsb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpaddsb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpaddsb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpaddsb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpaddsb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpaddsw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpaddsw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpaddsw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpaddsw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpaddsw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpaddsw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpaddsw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpaddsw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpaddsw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpaddusb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpaddusb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpaddusb (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpaddusb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpaddusb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpaddusb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpaddusb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpaddusb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpaddusb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpaddusw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpaddusw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpaddusw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpaddusw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpaddusw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpaddusw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpaddusw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpaddusw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpaddusw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpaddw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpaddw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpaddw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpalignr $1, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpalignr $1, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpalignr $1, (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpalignr $1, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpalignr $1, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpalignr $1, (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpalignr $1, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpalignr $1, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpalignr $1, (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpavgb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpavgb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpavgb (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpavgb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpavgb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpavgb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpavgb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpavgb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpavgb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpavgw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpavgw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpavgw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpavgw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpavgw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpavgw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpavgw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpavgw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpavgw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpblendmb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpblendmb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpblendmb (%rax), %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpblendmb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpblendmb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpblendmb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpblendmb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpblendmb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpblendmb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpblendmw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpblendmw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpblendmw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpblendmw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpblendmw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpblendmw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpblendmw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpblendmw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpblendmw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastb (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastb (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %eax, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %xmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastb (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastb (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %eax, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastb (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastb (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %eax, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastw (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastw (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %eax, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %xmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastw (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastw (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %eax, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastw (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastw (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %eax, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqb %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqb (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqb (%rax), %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqb %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqb (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqb %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqb (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqb (%rax), %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqb %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqb (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqw %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqw (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqw (%rax), %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqw %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqw (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtb %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtb (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtb (%rax), %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtb %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtb (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtw %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtw (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtw (%rax), %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtw %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtw (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequb %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequb (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequb (%rax), %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequb %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequb (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequw %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequw (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequw (%rax), %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequw %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequw (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqw %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqw (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqw (%rax), %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqw %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqw (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - vpextrb $0, %xmm16, %eax # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - vpextrb $0, %xmm16, (%rax) # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - vpextrw $0, %xmm16, %eax # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - vpextrw $0, %xmm16, (%rax) # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpinsrb $0, %eax, %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpinsrb $0, (%rax), %xmm16, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpinsrb $0, (%rax), %xmm16, %xmm19 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpinsrw $0, %eax, %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpinsrw $0, (%rax), %xmm16, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpinsrw $0, (%rax), %xmm16, %xmm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpermw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpermw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpermw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - vpermi2w %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - vpermi2w (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - vpermi2w (%rax), %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - vpermi2w %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - vpermi2w (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - vpermi2w (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - vpermi2w %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - vpermi2w (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - vpermi2w (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - vpermt2w %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - vpermt2w (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - vpermt2w (%rax), %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - vpermt2w %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - vpermt2w (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - vpermt2w (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - vpermt2w %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - vpermt2w (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - vpermt2w (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaddubsw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaddubsw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaddubsw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaddubsw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaddubsw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaddubsw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaddubsw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaddubsw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaddubsw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaddwd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaddwd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaddwd (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaddwd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaddwd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaddwd (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaddwd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaddwd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaddwd (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaxsb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaxsb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaxsb (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaxsb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaxsb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaxsb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaxsb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaxsb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaxsb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaxsw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaxsw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaxsw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaxsw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaxsw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaxsw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaxsw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaxsw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaxsw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaxub %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaxub (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaxub (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaxub %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaxub (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaxub (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaxub %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaxub (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaxub (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaxuw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaxuw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaxuw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaxuw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaxuw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaxuw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaxuw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaxuw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaxuw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpminsb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpminsb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpminsb (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpminsb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpminsb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpminsb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpminsb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpminsb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpminsb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpminsw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpminsw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpminsw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpminsw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpminsw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpminsw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpminsw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpminsw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpminsw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpminub %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpminub (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpminub (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpminub %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpminub (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpminub (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpminub %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpminub (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpminub (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpminuw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpminuw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpminuw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpminuw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpminuw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpminuw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpminuw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpminuw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpminuw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmovb2m %zmm0, %k0 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmovw2m %zmm0, %k0 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpmovm2b %k0, %zmm0 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpmovm2w %k0, %zmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbw %ymm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbw (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbw (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbw %ymm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbw (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbw (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbw %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbw (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbw (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovswb %zmm16, %ymm19 # CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovswb %zmm16, (%rax) # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovswb %zmm16, %ymm19 {%k1} # CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovswb %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovswb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovuswb %zmm16, %ymm19 # CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovuswb %zmm16, (%rax) # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovuswb %zmm16, %ymm19 {%k1} # CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovuswb %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovuswb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovwb %zmm16, %ymm19 # CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovwb %zmm16, (%rax) # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovwb %zmm16, %ymm19 {%k1} # CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovwb %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovwb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbw %ymm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbw (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbw (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbw %ymm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbw (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbw (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbw %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbw (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbw (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmulhrsw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulhrsw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulhrsw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmulhrsw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulhrsw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulhrsw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmulhrsw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulhrsw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulhrsw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmulhuw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulhuw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulhuw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmulhuw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulhuw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulhuw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmulhuw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulhuw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulhuw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmulhw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulhw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulhw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmulhw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulhw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulhw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmulhw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulhw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulhw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmullw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmullw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmullw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmullw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmullw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmullw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmullw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmullw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmullw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpsadbw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpsadbw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpsadbw (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpshufb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufb (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpshufb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpshufb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpshufhw $0, %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufhw $0, (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufhw $0, (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpshufhw $0, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufhw $0, (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufhw $0, (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpshufhw $0, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufhw $0, (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufhw $0, (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpshuflw $0, %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshuflw $0, (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshuflw $0, (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpshuflw $0, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshuflw $0, (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshuflw $0, (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpshuflw $0, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshuflw $0, (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshuflw $0, (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpslldq $1, %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpslldq $1, (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpslldq $1, (%rax), %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsllvw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsllvw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsllvw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsllvw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsllvw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsllvw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsllvw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsllvw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsllvw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsllw $0, %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsllw $0, (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsllw $0, (%rax), %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsllw $0, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsllw $0, (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsllw $0, (%rax), %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsllw $0, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsllw $0, (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsllw $0, (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpsllw %xmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsllw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsllw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpsllw %xmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsllw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsllw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpsllw %xmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsllw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsllw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsravw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsravw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsravw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsravw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsravw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsravw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsravw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsravw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsravw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsraw $0, %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsraw $0, (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsraw $0, (%rax), %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsraw $0, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsraw $0, (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsraw $0, (%rax), %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsraw $0, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsraw $0, (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsraw $0, (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpsraw %xmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsraw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsraw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpsraw %xmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsraw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsraw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpsraw %xmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsraw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsraw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpsrldq $1, %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpsrldq $1, (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpsrldq $1, (%rax), %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsrlvw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsrlvw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsrlvw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsrlvw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsrlvw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsrlvw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsrlvw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsrlvw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsrlvw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsrlw $0, %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsrlw $0, (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsrlw $0, (%rax), %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsrlw $0, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsrlw $0, (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsrlw $0, (%rax), %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsrlw $0, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsrlw $0, (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsrlw $0, (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpsrlw %xmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsrlw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsrlw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpsrlw %xmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsrlw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsrlw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpsrlw %xmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsrlw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsrlw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubb (%rax), %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsubsb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsubsb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsubsb (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsubsb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsubsb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsubsb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsubsb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsubsb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsubsb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsubsw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsubsw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsubsw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsubsw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsubsw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsubsw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsubsw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsubsw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsubsw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsubusb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsubusb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsubusb (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsubusb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsubusb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsubusb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsubusb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsubusb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsubusb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsubusw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsubusw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsubusw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsubusw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsubusw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsubusw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsubusw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsubusw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsubusw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmb %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmb (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmb (%rax), %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmb %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmb (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmw %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmw (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmw (%rax), %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmw %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmw (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmb %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmb (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmb (%rax), %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmb %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmb (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmw %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmw (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmw (%rax), %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmw %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmw (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckhbw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhbw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhbw (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckhbw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhbw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhbw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckhbw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhbw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhbw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckhwd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhwd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhwd (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckhwd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhwd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhwd (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckhwd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhwd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhwd (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpcklbw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpcklbw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpcklbw (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpcklbw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpcklbw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpcklbw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpcklbw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpcklbw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpcklbw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpcklwd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpcklwd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpcklwd (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpcklwd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpcklwd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpcklwd (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpcklwd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpcklwd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpcklwd (%rax), %zmm17, %zmm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bwvl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bwvl.s index 963363964ad91..33e3d745ad391 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bwvl.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bwvl.s @@ -621,31 +621,37 @@ vpmovswb %xmm16, %xmm19 vpmovswb %xmm16, (%rax) vpmovswb %xmm16, %xmm19 {k1} vpmovswb %xmm16, (%rax) {k1} +vpmovswb %xmm16, %xmm19 {z}{k1} vpmovswb %ymm16, %xmm19 vpmovswb %ymm16, (%rax) vpmovswb %ymm16, %xmm19 {k1} vpmovswb %ymm16, (%rax) {k1} +vpmovswb %ymm16, %xmm19 {z}{k1} vpmovuswb %xmm16, %xmm19 vpmovuswb %xmm16, (%rax) vpmovuswb %xmm16, %xmm19 {k1} vpmovuswb %xmm16, (%rax) {k1} +vpmovuswb %xmm16, %xmm19 {z}{k1} vpmovuswb %ymm16, %xmm19 vpmovuswb %ymm16, (%rax) vpmovuswb %ymm16, %xmm19 {k1} vpmovuswb %ymm16, (%rax) {k1} +vpmovuswb %ymm16, %xmm19 {z}{k1} vpmovwb %xmm16, %xmm19 vpmovwb %xmm16, (%rax) vpmovwb %xmm16, %xmm19 {k1} vpmovwb %xmm16, (%rax) {k1} +vpmovwb %xmm16, %xmm19 {z}{k1} vpmovwb %ymm16, %xmm19 vpmovwb %ymm16, (%rax) vpmovwb %ymm16, %xmm19 {k1} vpmovwb %ymm16, (%rax) {k1} +vpmovwb %ymm16, %xmm19 {z}{k1} vpmovzxbw %xmm16, %xmm19 vpmovzxbw (%rax), %xmm19 @@ -1620,26 +1626,32 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 4 12 1.00 * vpmovswb %xmm16, (%rax) # CHECK-NEXT: 2 4 1.00 vpmovswb %xmm16, %xmm19 {%k1} # CHECK-NEXT: 4 13 1.00 * vpmovswb %xmm16, (%rax) {%k1} +# CHECK-NEXT: 2 4 1.00 vpmovswb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: 2 4 1.00 vpmovswb %ymm16, %xmm19 # CHECK-NEXT: 4 12 1.00 * vpmovswb %ymm16, (%rax) # CHECK-NEXT: 2 6 1.00 vpmovswb %ymm16, %xmm19 {%k1} # CHECK-NEXT: 4 15 1.00 * vpmovswb %ymm16, (%rax) {%k1} +# CHECK-NEXT: 2 6 1.00 vpmovswb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: 2 2 1.00 vpmovuswb %xmm16, %xmm19 # CHECK-NEXT: 4 12 1.00 * vpmovuswb %xmm16, (%rax) # CHECK-NEXT: 2 4 1.00 vpmovuswb %xmm16, %xmm19 {%k1} # CHECK-NEXT: 4 13 1.00 * vpmovuswb %xmm16, (%rax) {%k1} +# CHECK-NEXT: 2 4 1.00 vpmovuswb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: 2 4 1.00 vpmovuswb %ymm16, %xmm19 # CHECK-NEXT: 4 12 1.00 * vpmovuswb %ymm16, (%rax) # CHECK-NEXT: 2 6 1.00 vpmovuswb %ymm16, %xmm19 {%k1} # CHECK-NEXT: 4 15 1.00 * vpmovuswb %ymm16, (%rax) {%k1} +# CHECK-NEXT: 2 6 1.00 vpmovuswb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: 2 2 1.00 vpmovwb %xmm16, %xmm19 # CHECK-NEXT: 4 12 1.00 * vpmovwb %xmm16, (%rax) # CHECK-NEXT: 2 4 1.00 vpmovwb %xmm16, %xmm19 {%k1} # CHECK-NEXT: 4 13 1.00 * vpmovwb %xmm16, (%rax) {%k1} +# CHECK-NEXT: 2 4 1.00 vpmovwb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: 2 4 1.00 vpmovwb %ymm16, %xmm19 # CHECK-NEXT: 4 12 1.00 * vpmovwb %ymm16, (%rax) # CHECK-NEXT: 2 6 1.00 vpmovwb %ymm16, %xmm19 {%k1} # CHECK-NEXT: 4 15 1.00 * vpmovwb %ymm16, (%rax) {%k1} +# CHECK-NEXT: 2 6 1.00 vpmovwb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: 1 1 0.50 vpmovzxbw %xmm16, %xmm19 # CHECK-NEXT: 2 8 0.50 * vpmovzxbw (%rax), %xmm19 # CHECK-NEXT: 1 3 0.50 vpmovzxbw %xmm16, %xmm19 {%k1} @@ -2026,514 +2038,514 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 258.67 323.17 144.00 144.00 10.00 420.17 - 10.00 10.00 10.00 - 144.00 - +# CHECK-NEXT: 258.67 326.17 144.00 144.00 10.00 429.17 - 10.00 10.00 10.00 144.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - vdbpsadbw $0, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vdbpsadbw $0, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vdbpsadbw $0, (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vdbpsadbw $0, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vdbpsadbw $0, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vdbpsadbw $0, (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vdbpsadbw $0, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vdbpsadbw $0, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vdbpsadbw $0, (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vdbpsadbw $0, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vdbpsadbw $0, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vdbpsadbw $0, (%rax), %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vdbpsadbw $0, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vdbpsadbw $0, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vdbpsadbw $0, (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vdbpsadbw $0, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vdbpsadbw $0, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vdbpsadbw $0, (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu8 %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu8 (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu8 (%rax), %xmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu8 %xmm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu8 %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu8 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu8 (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu8 %xmm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu8 %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu8 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu8 (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu8 %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu8 (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu8 (%rax), %ymm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu8 %ymm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu8 %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu8 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu8 (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu8 %ymm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu8 %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu8 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu8 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu16 %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu16 (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu16 (%rax), %xmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu16 %xmm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu16 %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu16 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu16 (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu16 %xmm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu16 %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu16 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu16 (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu16 %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu16 (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu16 (%rax), %ymm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu16 %ymm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu16 %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu16 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu16 (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu16 %ymm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu16 %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu16 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu16 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsb %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsb (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsb (%rax), %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsb %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsb (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsb (%rax), %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsb %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsb (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsb (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsb %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsb (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsb (%rax), %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsb %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsb (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsb (%rax), %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsb %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsb (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsb (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsw %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsw (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsw (%rax), %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsw %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsw (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsw (%rax), %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsw %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsw (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsw (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsw %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsw (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsw (%rax), %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsw %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsw (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsw (%rax), %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsw %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsw (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsw (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackssdw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackssdw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackssdw (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackssdw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackssdw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackssdw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackssdw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackssdw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackssdw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackssdw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackssdw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackssdw (%rax), %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackssdw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackssdw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackssdw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackssdw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackssdw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackssdw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpacksswb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpacksswb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpacksswb (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpacksswb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpacksswb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpacksswb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpacksswb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpacksswb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpacksswb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpacksswb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpacksswb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpacksswb (%rax), %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpacksswb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpacksswb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpacksswb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpacksswb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpacksswb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpacksswb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackusdw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackusdw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackusdw (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackusdw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackusdw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackusdw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackusdw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackusdw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackusdw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackusdw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackusdw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackusdw (%rax), %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackusdw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackusdw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackusdw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackusdw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackusdw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackusdw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackuswb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackuswb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackuswb (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackuswb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackuswb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackuswb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackuswb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackuswb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackuswb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackuswb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackuswb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackuswb (%rax), %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackuswb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackuswb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackuswb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackuswb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackuswb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackuswb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddb (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddb (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsb (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsb (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusb (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusb (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpalignr $1, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpalignr $1, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpalignr $1, (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpalignr $1, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpalignr $1, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpalignr $1, (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpalignr $1, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpalignr $1, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpalignr $1, (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpalignr $1, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpalignr $1, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpalignr $1, (%rax), %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpalignr $1, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpalignr $1, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpalignr $1, (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpalignr $1, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpalignr $1, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpalignr $1, (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgb (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgb (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendmb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendmb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendmb (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendmb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendmb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendmb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendmb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendmb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendmb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendmb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendmb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendmb (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendmb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendmb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendmb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendmb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendmb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendmb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendmw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendmw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendmw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendmw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendmw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendmw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendmw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendmw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendmw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendmw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendmw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendmw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendmw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendmw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendmw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendmw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendmw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendmw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastb (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastb (%rax), %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %eax, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastb (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastb (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %eax, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastb (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastb (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %eax, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastb (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastb (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %eax, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastb (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastb (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %eax, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastb (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastb (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %eax, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastw (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastw (%rax), %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %eax, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastw (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastw (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %eax, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastw (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastw (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %eax, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastw (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastw (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %eax, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastw (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastw (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %eax, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastw (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastw (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %eax, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqb %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqb (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqb (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqb %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqb (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqb %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqb (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqb (%rax), %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqb %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqb (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqb %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqb (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqb (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqb %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqb (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqb %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqb (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqb (%rax), %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqb %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqb (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqw %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqw (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqw (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqw %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqw (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqw %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqw (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqw (%rax), %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqw %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqw (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtb %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtb (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtb (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtb %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtb (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtb %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtb (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtb (%rax), %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtb %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtb (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtw %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtw (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtw (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtw %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtw (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtw %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtw (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtw (%rax), %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtw %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtw (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequb %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequb (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequb (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequb %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequb (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequb %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequb (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequb (%rax), %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequb %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequb (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequw %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequw (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequw (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequw %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequw (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequw %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequw (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequw (%rax), %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequw %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequw (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqw %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqw (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqw (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqw %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqw (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqw %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqw (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqw (%rax), %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqw %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqw (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpermw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vpermw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vpermw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpermw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vpermw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vpermw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpermw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vpermw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vpermw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpermw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vpermw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vpermw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpermw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vpermw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vpermw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpermw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vpermw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vpermw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermi2w %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermi2w (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermi2w (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermi2w %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermi2w (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermi2w (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermi2w %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermi2w (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermi2w (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermi2w %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermi2w (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermi2w (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermi2w %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermi2w (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermi2w (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermi2w %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermi2w (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermi2w (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermt2w %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermt2w (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermt2w (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermt2w %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermt2w (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermt2w (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermt2w %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermt2w (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermt2w (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermt2w %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermt2w (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermt2w (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermt2w %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermt2w (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermt2w (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermt2w %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermt2w (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermt2w (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddubsw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddubsw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddubsw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddubsw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddubsw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddubsw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddubsw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddubsw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddubsw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddubsw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddubsw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddubsw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddubsw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddubsw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddubsw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddubsw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddubsw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddubsw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddwd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddwd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddwd (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddwd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddwd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddwd (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddwd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddwd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddwd (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddwd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddwd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddwd (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddwd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddwd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddwd (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddwd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddwd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddwd (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsb (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsb (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxub %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxub (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxub (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxub %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxub (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxub (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxub %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxub (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxub (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxub %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxub (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxub (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxub %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxub (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxub (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxub %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxub (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxub (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxuw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxuw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxuw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxuw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxuw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxuw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxuw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxuw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxuw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxuw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxuw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxuw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxuw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxuw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxuw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxuw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxuw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxuw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsb (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsb (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminub %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminub (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminub (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminub %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminub (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminub (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminub %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminub (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminub (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminub %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminub (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminub (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminub %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminub (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminub (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminub %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminub (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminub (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminuw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminuw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminuw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminuw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminuw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminuw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminuw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminuw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminuw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminuw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminuw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminuw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminuw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminuw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminuw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminuw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminuw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminuw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmovb2m %xmm0, %k0 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmovw2m %xmm0, %k0 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmovb2m %ymm0, %k0 @@ -2543,406 +2555,412 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpmovm2b %k0, %ymm0 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpmovm2w %k0, %ymm0 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbw %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbw (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbw (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbw %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbw (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbw (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbw %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbw (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbw (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbw %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbw (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbw (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbw %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbw (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbw (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbw %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbw (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbw (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovswb %xmm16, %xmm19 # CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovswb %xmm16, (%rax) # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovswb %xmm16, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovswb %xmm16, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovswb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovswb %ymm16, %xmm19 # CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovswb %ymm16, (%rax) # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovswb %ymm16, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovswb %ymm16, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovswb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovuswb %xmm16, %xmm19 # CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovuswb %xmm16, (%rax) # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovuswb %xmm16, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovuswb %xmm16, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovuswb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovuswb %ymm16, %xmm19 # CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovuswb %ymm16, (%rax) # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovuswb %ymm16, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovuswb %ymm16, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovuswb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovwb %xmm16, %xmm19 # CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovwb %xmm16, (%rax) # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovwb %xmm16, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovwb %xmm16, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovwb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovwb %ymm16, %xmm19 # CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovwb %ymm16, (%rax) # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovwb %ymm16, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovwb %ymm16, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovwb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbw %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbw (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbw (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbw %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbw (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbw (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbw %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbw (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbw (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbw %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbw (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbw (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbw %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbw (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbw (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbw %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbw (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbw (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhrsw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhrsw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhrsw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhrsw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhrsw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhrsw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhrsw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhrsw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhrsw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhrsw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhrsw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhrsw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhrsw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhrsw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhrsw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhrsw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhrsw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhrsw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhuw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhuw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhuw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhuw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhuw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhuw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhuw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhuw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhuw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhuw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhuw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhuw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhuw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhuw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhuw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhuw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhuw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhuw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmullw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmullw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmullw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmullw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmullw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmullw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmullw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmullw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmullw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmullw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmullw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmullw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmullw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmullw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmullw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmullw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmullw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmullw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpsadbw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpsadbw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpsadbw (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpsadbw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpsadbw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpsadbw (%rax), %ymm17, %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufb (%rax), %xmm17, %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufb (%rax), %ymm17, %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufhw $0, %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufhw $0, (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufhw $0, (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufhw $0, %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufhw $0, (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufhw $0, (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufhw $0, %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufhw $0, (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufhw $0, (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufhw $0, %ymm16, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufhw $0, (%rax), %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufhw $0, (%rax), %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufhw $0, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufhw $0, (%rax), %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufhw $0, (%rax), %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufhw $0, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufhw $0, (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufhw $0, (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshuflw $0, %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshuflw $0, (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshuflw $0, (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshuflw $0, %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshuflw $0, (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshuflw $0, (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshuflw $0, %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshuflw $0, (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshuflw $0, (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshuflw $0, %ymm16, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshuflw $0, (%rax), %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshuflw $0, (%rax), %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshuflw $0, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshuflw $0, (%rax), %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshuflw $0, (%rax), %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshuflw $0, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshuflw $0, (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshuflw $0, (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpslldq $1, %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpslldq $1, (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpslldq $1, (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpslldq $1, %ymm16, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpslldq $1, (%rax), %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpslldq $1, (%rax), %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllw $0, %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw $0, (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw $0, (%rax), %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllw $0, %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw $0, (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw $0, (%rax), %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllw $0, %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw $0, (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw $0, (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllw $0, %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw $0, (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw $0, (%rax), %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllw $0, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw $0, (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw $0, (%rax), %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllw $0, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw $0, (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw $0, (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsllw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsllw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsllw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsllw %xmm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsllw %xmm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsllw %xmm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsravw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsravw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsravw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsravw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsravw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsravw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsravw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsravw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsravw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsravw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsravw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsravw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsravw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsravw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsravw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsravw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsravw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsravw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsraw $0, %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw $0, (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw $0, (%rax), %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsraw $0, %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw $0, (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw $0, (%rax), %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsraw $0, %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw $0, (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw $0, (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsraw $0, %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw $0, (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw $0, (%rax), %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsraw $0, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw $0, (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw $0, (%rax), %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsraw $0, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw $0, (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw $0, (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsraw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsraw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsraw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsraw %xmm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsraw %xmm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsraw %xmm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpsrldq $1, %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpsrldq $1, (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpsrldq $1, (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpsrldq $1, %ymm16, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpsrldq $1, (%rax), %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpsrldq $1, (%rax), %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlw $0, %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw $0, (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw $0, (%rax), %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlw $0, %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw $0, (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw $0, (%rax), %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlw $0, %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw $0, (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw $0, (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlw $0, %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw $0, (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw $0, (%rax), %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlw $0, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw $0, (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw $0, (%rax), %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlw $0, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw $0, (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw $0, (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsrlw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsrlw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsrlw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsrlw %xmm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsrlw %xmm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsrlw %xmm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubb (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubb (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsb (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsb (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusb (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusb (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmb %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmb (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmb (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmb %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmb (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmb %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmb (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmb (%rax), %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmb %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmb (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmw %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmw (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmw (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmw %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmw (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmw %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmw (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmw (%rax), %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmw %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmw (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmb %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmb (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmb (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmb %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmb (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmb %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmb (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmb (%rax), %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmb %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmb (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmw %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmw (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmw (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmw %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmw (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmw %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmw (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmw (%rax), %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmw %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmw (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhbw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhbw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhbw (%rax), %xmm17, %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhbw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhbw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhbw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhbw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhbw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhbw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhbw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhbw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhbw (%rax), %ymm17, %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhbw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhbw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhbw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhbw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhbw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhbw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhwd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhwd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhwd (%rax), %xmm17, %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhwd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhwd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhwd (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhwd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhwd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhwd (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhwd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhwd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhwd (%rax), %ymm17, %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhwd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhwd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhwd (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhwd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhwd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhwd (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklbw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklbw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklbw (%rax), %xmm17, %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklbw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklbw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklbw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklbw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklbw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklbw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklbw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklbw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklbw (%rax), %ymm17, %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklbw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklbw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklbw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklbw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklbw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklbw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklwd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklwd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklwd (%rax), %xmm17, %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklwd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklwd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklwd (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklwd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklwd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklwd (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklwd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklwd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklwd (%rax), %ymm17, %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklwd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklwd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklwd (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklwd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklwd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklwd (%rax), %ymm17, %ymm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512cd.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512cd.s index 61e9c8b9b38aa..f4dd6b2fa63b7 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512cd.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512cd.s @@ -110,45 +110,45 @@ vplzcntq (%rax){1to8}, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 243.50 - 8.00 8.00 - 297.50 - - - - - 8.00 - +# CHECK-NEXT: 243.50 - 8.00 8.00 - 297.50 - - - - 8.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpbroadcastmb2q %k0, %zmm16 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpbroadcastmw2d %k0, %zmm16 # CHECK-NEXT: 15.50 - - - - 21.50 - - - - - - - vpconflictd %zmm16, %zmm19 -# CHECK-NEXT: 15.00 - 0.33 0.33 - 21.00 - - - - - 0.33 - vpconflictd (%rax), %zmm19 -# CHECK-NEXT: 15.00 - 0.33 0.33 - 21.00 - - - - - 0.33 - vpconflictd (%rax){1to16}, %zmm19 +# CHECK-NEXT: 15.00 - 0.33 0.33 - 21.00 - - - - 0.33 - - vpconflictd (%rax), %zmm19 +# CHECK-NEXT: 15.00 - 0.33 0.33 - 21.00 - - - - 0.33 - - vpconflictd (%rax){1to16}, %zmm19 # CHECK-NEXT: 15.50 - - - - 21.50 - - - - - - - vpconflictd %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 15.00 - 0.33 0.33 - 21.00 - - - - - 0.33 - vpconflictd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 15.00 - 0.33 0.33 - 21.00 - - - - - 0.33 - vpconflictd (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 15.00 - 0.33 0.33 - 21.00 - - - - 0.33 - - vpconflictd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 15.00 - 0.33 0.33 - 21.00 - - - - 0.33 - - vpconflictd (%rax){1to16}, %zmm19 {%k1} # CHECK-NEXT: 15.50 - - - - 21.50 - - - - - - - vpconflictd %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 15.00 - 0.33 0.33 - 21.00 - - - - - 0.33 - vpconflictd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 15.00 - 0.33 0.33 - 21.00 - - - - - 0.33 - vpconflictd (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 15.00 - 0.33 0.33 - 21.00 - - - - 0.33 - - vpconflictd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 15.00 - 0.33 0.33 - 21.00 - - - - 0.33 - - vpconflictd (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 10.00 - - - - 12.00 - - - - - - - vpconflictq %zmm16, %zmm19 -# CHECK-NEXT: 9.50 - 0.33 0.33 - 11.50 - - - - - 0.33 - vpconflictq (%rax), %zmm19 -# CHECK-NEXT: 9.50 - 0.33 0.33 - 11.50 - - - - - 0.33 - vpconflictq (%rax){1to8}, %zmm19 +# CHECK-NEXT: 9.50 - 0.33 0.33 - 11.50 - - - - 0.33 - - vpconflictq (%rax), %zmm19 +# CHECK-NEXT: 9.50 - 0.33 0.33 - 11.50 - - - - 0.33 - - vpconflictq (%rax){1to8}, %zmm19 # CHECK-NEXT: 10.00 - - - - 12.00 - - - - - - - vpconflictq %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 9.50 - 0.33 0.33 - 11.50 - - - - - 0.33 - vpconflictq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 9.50 - 0.33 0.33 - 11.50 - - - - - 0.33 - vpconflictq (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 9.50 - 0.33 0.33 - 11.50 - - - - 0.33 - - vpconflictq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 9.50 - 0.33 0.33 - 11.50 - - - - 0.33 - - vpconflictq (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 10.00 - - - - 12.00 - - - - - - - vpconflictq %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 9.50 - 0.33 0.33 - 11.50 - - - - - 0.33 - vpconflictq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 9.50 - 0.33 0.33 - 11.50 - - - - - 0.33 - vpconflictq (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 9.50 - 0.33 0.33 - 11.50 - - - - 0.33 - - vpconflictq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 9.50 - 0.33 0.33 - 11.50 - - - - 0.33 - - vpconflictq (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vplzcntd %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax){1to16}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vplzcntd %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax){1to16}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vplzcntd %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vplzcntq %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vplzcntq %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vplzcntq %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax){1to8}, %zmm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512cdvl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512cdvl.s index d35c8657cf9fa..8dac8e96ed676 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512cdvl.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512cdvl.s @@ -190,7 +190,7 @@ vplzcntq (%rax){1to4}, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 148.00 139.50 16.00 16.00 - 238.00 4.50 - - - - 16.00 - +# CHECK-NEXT: 148.00 139.50 16.00 16.00 - 238.00 4.50 - - - 16.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -199,74 +199,74 @@ vplzcntq (%rax){1to4}, %ymm19 {z}{k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpbroadcastmw2d %k0, %xmm16 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpbroadcastmw2d %k0, %ymm16 # CHECK-NEXT: 4.17 4.17 - - - 6.67 - - - - - - - vpconflictd %xmm16, %xmm19 -# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - - 0.33 - vpconflictd (%rax), %xmm19 -# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - - 0.33 - vpconflictd (%rax){1to4}, %xmm19 +# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - 0.33 - - vpconflictd (%rax), %xmm19 +# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - 0.33 - - vpconflictd (%rax){1to4}, %xmm19 # CHECK-NEXT: 4.17 4.17 - - - 6.67 - - - - - - - vpconflictd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - - 0.33 - vpconflictd (%rax), %xmm19 {%k1} -# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - - 0.33 - vpconflictd (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - 0.33 - - vpconflictd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - 0.33 - - vpconflictd (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 4.17 4.17 - - - 6.67 - - - - - - - vpconflictd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - - 0.33 - vpconflictd (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - - 0.33 - vpconflictd (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - 0.33 - - vpconflictd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - 0.33 - - vpconflictd (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 6.00 5.50 - - - 11.00 0.50 - - - - - - vpconflictd %ymm16, %ymm19 -# CHECK-NEXT: 5.67 5.17 0.33 0.33 - 10.67 0.50 - - - - 0.33 - vpconflictd (%rax), %ymm19 -# CHECK-NEXT: 5.67 5.17 0.33 0.33 - 10.67 0.50 - - - - 0.33 - vpconflictd (%rax){1to8}, %ymm19 +# CHECK-NEXT: 5.67 5.17 0.33 0.33 - 10.67 0.50 - - - 0.33 - - vpconflictd (%rax), %ymm19 +# CHECK-NEXT: 5.67 5.17 0.33 0.33 - 10.67 0.50 - - - 0.33 - - vpconflictd (%rax){1to8}, %ymm19 # CHECK-NEXT: 6.00 5.50 - - - 11.00 0.50 - - - - - - vpconflictd %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 5.67 5.17 0.33 0.33 - 10.67 0.50 - - - - 0.33 - vpconflictd (%rax), %ymm19 {%k1} -# CHECK-NEXT: 5.67 5.17 0.33 0.33 - 10.67 0.50 - - - - 0.33 - vpconflictd (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 5.67 5.17 0.33 0.33 - 10.67 0.50 - - - 0.33 - - vpconflictd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 5.67 5.17 0.33 0.33 - 10.67 0.50 - - - 0.33 - - vpconflictd (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 6.00 5.50 - - - 11.00 0.50 - - - - - - vpconflictd %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 5.67 5.17 0.33 0.33 - 10.67 0.50 - - - - 0.33 - vpconflictd (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 5.67 5.17 0.33 0.33 - 10.67 0.50 - - - - 0.33 - vpconflictd (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 5.67 5.17 0.33 0.33 - 10.67 0.50 - - - 0.33 - - vpconflictd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 5.67 5.17 0.33 0.33 - 10.67 0.50 - - - 0.33 - - vpconflictd (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpconflictq %xmm16, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpconflictq (%rax), %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpconflictq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpconflictq (%rax), %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpconflictq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpconflictq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpconflictq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpconflictq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpconflictq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpconflictq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpconflictq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpconflictq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpconflictq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpconflictq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpconflictq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 4.17 4.17 - - - 6.67 - - - - - - - vpconflictq %ymm16, %ymm19 -# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - - 0.33 - vpconflictq (%rax), %ymm19 -# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - - 0.33 - vpconflictq (%rax){1to4}, %ymm19 +# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - 0.33 - - vpconflictq (%rax), %ymm19 +# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - 0.33 - - vpconflictq (%rax){1to4}, %ymm19 # CHECK-NEXT: 4.17 4.17 - - - 6.67 - - - - - - - vpconflictq %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - - 0.33 - vpconflictq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - - 0.33 - vpconflictq (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - 0.33 - - vpconflictq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - 0.33 - - vpconflictq (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 4.17 4.17 - - - 6.67 - - - - - - - vpconflictq %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - - 0.33 - vpconflictq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - - 0.33 - vpconflictq (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - 0.33 - - vpconflictq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - 0.33 - - vpconflictq (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vplzcntd %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vplzcntd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vplzcntd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vplzcntd %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax){1to8}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax){1to8}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vplzcntd %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vplzcntd %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vplzcntq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vplzcntq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vplzcntq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vplzcntq %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vplzcntq %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vplzcntq %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax){1to4}, %ymm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512dq.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512dq.s index cf451f6feff85..504eda42d3010 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512dq.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512dq.s @@ -873,7 +873,7 @@ vxorps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 256.00 22.00 68.00 68.00 5.50 165.00 - 5.50 5.50 5.50 - 68.00 - +# CHECK-NEXT: 256.00 22.00 68.00 68.00 5.50 165.00 - 5.50 5.50 5.50 68.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -882,7 +882,7 @@ vxorps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - kandb %k0, %k1, %k2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - kandnb %k0, %k1, %k2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - kmovb %k0, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - kmovb (%rax), %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - kmovb (%rax), %k2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - kmovb %k0, (%rax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - kmovb %eax, %k2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - kmovb %k0, %eax @@ -896,173 +896,173 @@ vxorps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - kxnorb %k0, %k1, %k2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - kxorb %k0, %k1, %k2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vandnpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandnpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandnpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandnpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandnpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vandnpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandnpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandnpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandnpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandnpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vandnpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandnpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandnpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandnpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandnpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vandnps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandnps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandnps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandnps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandnps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vandnps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandnps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandnps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandnps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandnps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vandnps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandnps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandnps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandnps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandnps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vandpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vandpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vandpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vandps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vandps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vandps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastf32x2 %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastf32x2 (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastf32x2 (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastf32x2 %xmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastf32x2 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastf32x2 (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastf32x2 %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastf32x2 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastf32x8 (%rax), %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastf32x8 (%rax), %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastf32x8 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastf64x2 (%rax), %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastf64x2 (%rax), %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastf64x2 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastf32x2 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastf32x8 (%rax), %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastf32x8 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastf32x8 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastf64x2 (%rax), %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastf64x2 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastf64x2 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcasti32x2 %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcasti32x2 (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcasti32x2 (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcasti32x2 %xmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcasti32x2 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcasti32x2 (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcasti32x2 %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcasti32x2 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcasti32x8 (%rax), %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcasti32x8 (%rax), %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcasti32x8 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcasti64x2 (%rax), %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcasti64x2 (%rax), %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcasti64x2 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcasti32x2 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcasti32x8 (%rax), %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcasti32x8 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcasti32x8 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcasti64x2 (%rax), %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcasti64x2 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcasti64x2 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtpd2qq %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtpd2qq %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtpd2qq %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtpd2uqq %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtpd2uqq %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtpd2uqq %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtps2qq %ymm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2qq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2qq (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2qq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2qq (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtps2qq %ymm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2qq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2qq (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2qq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2qq (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtps2qq %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2qq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2qq (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2qq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2qq (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtps2uqq %ymm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2uqq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2uqq (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2uqq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2uqq (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtps2uqq %ymm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2uqq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2uqq (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2uqq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2uqq (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtps2uqq %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2uqq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2uqq (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2uqq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2uqq (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtqq2pd %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtqq2pd %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtqq2pd %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtqq2ps %zmm16, %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2ps (%rax), %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2ps (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2ps (%rax), %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2ps (%rax){1to8}, %ymm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtqq2ps %zmm16, %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2ps (%rax), %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2ps (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2ps (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2ps (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtqq2ps %zmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2ps (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2ps (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2ps (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2ps (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvttpd2qq %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvttpd2qq %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvttpd2qq %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvttpd2uqq %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvttpd2uqq %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvttpd2uqq %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvttps2qq %ymm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttps2qq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttps2qq (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttps2qq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttps2qq (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvttps2qq %ymm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttps2qq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttps2qq (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttps2qq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttps2qq (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvttps2qq %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttps2qq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttps2qq (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttps2qq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttps2qq (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvttps2uqq %ymm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttps2uqq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttps2uqq (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttps2uqq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttps2uqq (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvttps2uqq %ymm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttps2uqq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttps2uqq (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttps2uqq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttps2uqq (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvttps2uqq %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttps2uqq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttps2uqq (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttps2uqq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttps2uqq (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtuqq2pd %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtuqq2pd %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtuqq2pd %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtuqq2ps %zmm16, %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2ps (%rax), %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2ps (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2ps (%rax), %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2ps (%rax){1to8}, %ymm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtuqq2ps %zmm16, %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2ps (%rax), %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2ps (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2ps (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2ps (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtuqq2ps %zmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2ps (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2ps (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2ps (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2ps (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vextractf32x8 $1, %zmm16, %ymm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vextractf32x8 $1, %zmm16, (%rax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - vextractf32x8 $1, %zmm16, %ymm19 {%k1} @@ -1084,184 +1084,184 @@ vxorps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vextracti64x2 $1, %zmm16, (%rax) {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vextracti64x2 $1, %zmm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclasspd $171, %zmm16, %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspdz $171, (%rax), %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspd $171, (%rax){1to8}, %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspdz $171, (%rax), %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspd $171, (%rax){1to8}, %k1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclasspd $171, %zmm16, %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspdz $171, (%rax), %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspd $171, (%rax){1to8}, %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspdz $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspd $171, (%rax){1to8}, %k1 {%k2} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclassps $171, %zmm16, %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspsz $171, (%rax), %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclassps $171, (%rax){1to16}, %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspsz $171, (%rax), %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclassps $171, (%rax){1to16}, %k1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclassps $171, %zmm16, %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspsz $171, (%rax), %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclassps $171, (%rax){1to16}, %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspsz $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclassps $171, (%rax){1to16}, %k1 {%k2} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclasssd $171, %xmm16, %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasssd $171, (%rax), %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasssd $171, (%rax), %k1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclasssd $171, %xmm16, %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasssd $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasssd $171, (%rax), %k1 {%k2} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclassss $171, %xmm16, %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclassss $171, (%rax), %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclassss $171, (%rax), %k1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclassss $171, %xmm16, %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclassss $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclassss $171, (%rax), %k1 {%k2} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinsertf32x8 $1, %ymm16, %zmm16, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vinsertf32x8 $1, (%rax), %zmm16, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vinsertf32x8 $1, (%rax), %zmm16, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinsertf32x8 $1, %ymm16, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vinsertf32x8 $1, (%rax), %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vinsertf32x8 $1, (%rax), %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinsertf32x8 $1, %ymm16, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vinsertf32x8 $1, (%rax), %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vinsertf32x8 $1, (%rax), %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinsertf64x2 $1, %xmm16, %zmm16, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vinsertf64x2 $1, (%rax), %zmm16, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vinsertf64x2 $1, (%rax), %zmm16, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinsertf64x2 $1, %xmm16, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vinsertf64x2 $1, (%rax), %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vinsertf64x2 $1, (%rax), %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinsertf64x2 $1, %xmm16, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vinsertf64x2 $1, (%rax), %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vinsertf64x2 $1, (%rax), %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinserti32x8 $1, %ymm16, %zmm16, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vinserti32x8 $1, (%rax), %zmm16, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vinserti32x8 $1, (%rax), %zmm16, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinserti32x8 $1, %ymm16, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vinserti32x8 $1, (%rax), %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vinserti32x8 $1, (%rax), %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinserti32x8 $1, %ymm16, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vinserti32x8 $1, (%rax), %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vinserti32x8 $1, (%rax), %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinserti64x2 $1, %xmm16, %zmm16, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vinserti64x2 $1, (%rax), %zmm16, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vinserti64x2 $1, (%rax), %zmm16, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinserti64x2 $1, %xmm16, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vinserti64x2 $1, (%rax), %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vinserti64x2 $1, (%rax), %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinserti64x2 $1, %xmm16, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vinserti64x2 $1, (%rax), %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vinserti64x2 $1, (%rax), %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vorpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vorpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vorpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vorpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vorpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vorpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vorpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vorpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vorpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vorpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vorpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vorpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vorpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vorpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vorpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vorps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vorps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vorps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vorps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vorps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vorps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vorps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vorps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vorps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vorps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vorps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vorps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vorps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vorps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vorps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - vpextrd $1, %xmm16, %ecx # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - vpextrd $1, %xmm16, (%rax) # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - vpextrq $1, %xmm16, %rcx # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - vpextrq $1, %xmm16, (%rax) # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpinsrd $1, %ecx, %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpinsrd $1, (%rax), %xmm16, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpinsrd $1, (%rax), %xmm16, %xmm19 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpinsrq $1, %rcx, %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpinsrq $1, (%rax), %xmm16, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpinsrq $1, (%rax), %xmm16, %xmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpmovm2d %k0, %zmm0 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpmovm2q %k0, %zmm0 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmovd2m %zmm0, %k0 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmovq2m %zmm0, %k0 # CHECK-NEXT: 3.00 - - - - - - - - - - - - vpmullq %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - - 0.33 - vpmullq (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - 0.33 - - vpmullq (%rax), %zmm17, %zmm19 # CHECK-NEXT: 3.00 - - - - - - - - - - - - vpmullq %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - - 0.33 - vpmullq (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - 0.33 - - vpmullq (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 3.00 - - - - - - - - - - - - vpmullq %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - - 0.33 - vpmullq (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - 0.33 - - vpmullq (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrangepd $ab, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrangepd $ab, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrangepd $ab, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrangepd $ab, {sae}, %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrangepd $ab, {sae}, %zmm16, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrangepd $ab, {sae}, %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrangeps $ab, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrangeps $ab, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrangeps $ab, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrangeps $ab, {sae}, %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrangeps $ab, {sae}, %zmm16, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrangeps $ab, {sae}, %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangesd $ab, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangesd $ab, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangesd $ab, (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangesd $ab, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangesd $ab, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangesd $ab, (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangesd $ab, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangesd $ab, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangesd $ab, (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangesd $ab, {sae}, %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangesd $ab, {sae}, %xmm16, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangesd $ab, {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangess $ab, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangess $ab, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangess $ab, (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangess $ab, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangess $ab, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangess $ab, (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangess $ab, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangess $ab, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangess $ab, (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangess $ab, {sae}, %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangess $ab, {sae}, %xmm16, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangess $ab, {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vreducepd $ab, %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vreducepd $ab, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vreducepd $ab, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vreducepd $ab, {sae}, %zmm16, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vreducepd $ab, {sae}, %zmm16, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vreducepd $ab, {sae}, %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vreduceps $ab, %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax){1to16}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vreduceps $ab, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax){1to16}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vreduceps $ab, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vreduceps $ab, {sae}, %zmm16, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vreduceps $ab, {sae}, %zmm16, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vreduceps $ab, {sae}, %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducesd $ab, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducesd $ab, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducesd $ab, (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducesd $ab, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducesd $ab, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducesd $ab, (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducesd $ab, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducesd $ab, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducesd $ab, (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducesd $ab, {sae}, %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducesd $ab, {sae}, %xmm16, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducesd $ab, {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducess $ab, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducess $ab, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducess $ab, (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducess $ab, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducess $ab, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducess $ab, (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducess $ab, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducess $ab, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducess $ab, (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducess $ab, {sae}, %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducess $ab, {sae}, %xmm16, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducess $ab, {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vxorpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vxorpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vxorpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vxorpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vxorpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vxorpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vxorpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vxorpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vxorpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vxorpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vxorpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vxorpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vxorpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vxorpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vxorpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vxorps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vxorps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vxorps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vxorps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vxorps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vxorps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vxorps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vxorps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vxorps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vxorps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vxorps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vxorps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vxorps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vxorps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vxorps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512dqvl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512dqvl.s index 2b87f5ecc3d0f..1cc0d9a63f237 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512dqvl.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512dqvl.s @@ -1139,322 +1139,322 @@ vxorps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 220.67 216.67 110.33 110.33 2.00 159.67 - 2.00 2.00 2.00 - 110.33 - +# CHECK-NEXT: 220.67 216.67 110.33 110.33 2.00 159.67 - 2.00 2.00 2.00 110.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnpd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnpd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnpd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandpd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandpd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandpd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastf32x2 %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastf32x2 (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastf32x2 (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastf32x2 %xmm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcastf32x2 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcastf32x2 (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastf32x2 %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcastf32x2 (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastf64x2 (%rax), %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcastf64x2 (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcastf64x2 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcastf32x2 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastf64x2 (%rax), %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcastf64x2 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcastf64x2 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcasti32x2 %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcasti32x2 (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcasti32x2 (%rax), %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcasti32x2 %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcasti32x2 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcasti32x2 (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcasti32x2 %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcasti32x2 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcasti32x2 (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcasti32x2 %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcasti32x2 (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcasti32x2 (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcasti32x2 %xmm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcasti32x2 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcasti32x2 (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcasti32x2 %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcasti32x2 (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcasti64x2 (%rax), %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcasti64x2 (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcasti64x2 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcasti32x2 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcasti64x2 (%rax), %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcasti64x2 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcasti64x2 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtpd2qq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtpd2qq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtpd2qq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtpd2qq %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtpd2qq %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtpd2qq %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtpd2uqq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtpd2uqq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtpd2uqq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtpd2uqq %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtpd2uqq %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtpd2uqq %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2qq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2qq (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2qq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2qq (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2qq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2qq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2qq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2qq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2qq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2qq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2qq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2qq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2qq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2qq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2qq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2qq %xmm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2qq (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2qq (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2qq (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2qq (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2qq %xmm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2qq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2qq (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2qq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2qq (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2qq %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2qq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2qq (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2qq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2qq (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2uqq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2uqq (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2uqq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2uqq (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2uqq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2uqq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2uqq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2uqq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2uqq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2uqq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2uqq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2uqq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2uqq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2uqq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2uqq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2uqq %xmm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2uqq (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2uqq (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2uqq (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2uqq (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2uqq %xmm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2uqq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2uqq (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2uqq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2uqq (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2uqq %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2uqq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2uqq (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2uqq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2uqq (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtqq2pd %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtqq2pd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtqq2pd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtqq2pd %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtqq2pd %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtqq2pd %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtqq2ps %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2psx (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2ps (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2psx (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2ps (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtqq2ps %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2psx (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2ps (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2psx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2ps (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtqq2ps %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2psx (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2ps (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2psx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2ps (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtqq2ps %ymm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2psx (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2ps (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2psx (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2ps (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtqq2ps %ymm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2psx (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2ps (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2psx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2ps (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtqq2ps %ymm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2psx (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2ps (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2psx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2ps (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttpd2qq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttpd2qq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttpd2qq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttpd2qq %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttpd2qq %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttpd2qq %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttpd2uqq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttpd2uqq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttpd2uqq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttpd2uqq %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttpd2uqq %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttpd2uqq %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttps2qq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2qq (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2qq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2qq (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2qq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttps2qq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2qq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2qq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2qq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2qq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttps2qq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2qq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2qq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2qq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2qq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttps2qq %xmm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2qq (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2qq (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2qq (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2qq (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttps2qq %xmm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2qq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2qq (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2qq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2qq (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttps2qq %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2qq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2qq (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2qq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2qq (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttps2uqq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2uqq (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2uqq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2uqq (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2uqq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttps2uqq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2uqq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2uqq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2uqq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2uqq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttps2uqq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2uqq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2uqq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2uqq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2uqq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttps2uqq %xmm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2uqq (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2uqq (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2uqq (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2uqq (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttps2uqq %xmm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2uqq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2uqq (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2uqq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2uqq (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttps2uqq %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2uqq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2uqq (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2uqq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2uqq (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtuqq2pd %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtuqq2pd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtuqq2pd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtuqq2pd %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtuqq2pd %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtuqq2pd %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtuqq2ps %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2psx (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2ps (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2psx (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2ps (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtuqq2ps %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2psx (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2ps (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2psx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2ps (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtuqq2ps %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2psx (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2ps (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2psx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2ps (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtuqq2ps %ymm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2psx (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2ps (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2psx (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2ps (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtuqq2ps %ymm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2psx (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2ps (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2psx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2ps (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtuqq2ps %ymm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2psx (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2ps (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2psx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2ps (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vextractf64x2 $1, %ymm16, %xmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vextractf64x2 $1, %ymm16, (%rax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - vextractf64x2 $1, %ymm16, %xmm19 {%k1} @@ -1466,41 +1466,41 @@ vxorps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vextracti64x2 $1, %ymm16, (%rax) {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vextracti64x2 $1, %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclasspd $171, %xmm16, %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspdx $171, (%rax), %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspd $171, (%rax){1to2}, %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspdx $171, (%rax), %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspd $171, (%rax){1to2}, %k1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclasspd $171, %xmm16, %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspdx $171, (%rax), %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspd $171, (%rax){1to2}, %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspdx $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspd $171, (%rax){1to2}, %k1 {%k2} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclasspd $171, %ymm16, %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspdy $171, (%rax), %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspd $171, (%rax){1to4}, %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspdy $171, (%rax), %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspd $171, (%rax){1to4}, %k1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclasspd $171, %ymm16, %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspdy $171, (%rax), %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspd $171, (%rax){1to4}, %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspdy $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspd $171, (%rax){1to4}, %k1 {%k2} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclassps $171, %xmm16, %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspsx $171, (%rax), %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclassps $171, (%rax){1to4}, %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspsx $171, (%rax), %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclassps $171, (%rax){1to4}, %k1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclassps $171, %xmm16, %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspsx $171, (%rax), %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclassps $171, (%rax){1to4}, %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspsx $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclassps $171, (%rax){1to4}, %k1 {%k2} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclassps $171, %ymm16, %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspsy $171, (%rax), %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclassps $171, (%rax){1to8}, %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspsy $171, (%rax), %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclassps $171, (%rax){1to8}, %k1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclassps $171, %ymm16, %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspsy $171, (%rax), %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclassps $171, (%rax){1to8}, %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspsy $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclassps $171, (%rax){1to8}, %k1 {%k2} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinsertf64x2 $1, %xmm16, %ymm16, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vinsertf64x2 $1, (%rax), %ymm16, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vinsertf64x2 $1, (%rax), %ymm16, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinsertf64x2 $1, %xmm16, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vinsertf64x2 $1, (%rax), %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vinsertf64x2 $1, (%rax), %ymm16, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinsertf64x2 $1, %xmm16, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vinsertf64x2 $1, (%rax), %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vinsertf64x2 $1, (%rax), %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinserti64x2 $1, %xmm16, %ymm16, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vinserti64x2 $1, (%rax), %ymm16, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vinserti64x2 $1, (%rax), %ymm16, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinserti64x2 $1, %xmm16, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vinserti64x2 $1, (%rax), %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vinserti64x2 $1, (%rax), %ymm16, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinserti64x2 $1, %xmm16, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vinserti64x2 $1, (%rax), %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vinserti64x2 $1, (%rax), %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpmovm2d %k0, %xmm0 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpmovm2q %k0, %xmm0 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpmovm2d %k0, %ymm0 @@ -1510,158 +1510,158 @@ vxorps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmovd2m %ymm0, %k0 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmovq2m %ymm0, %k0 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorpd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorpd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorpd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - vpmullq %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 1.50 1.50 0.33 0.33 - - - - - - - 0.33 - vpmullq (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 1.50 1.50 0.33 0.33 - - - - - - 0.33 - - vpmullq (%rax), %xmm17, %xmm19 # CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - vpmullq %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1.50 1.50 0.33 0.33 - - - - - - - 0.33 - vpmullq (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1.50 1.50 0.33 0.33 - - - - - - 0.33 - - vpmullq (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - vpmullq %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1.50 1.50 0.33 0.33 - - - - - - - 0.33 - vpmullq (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1.50 1.50 0.33 0.33 - - - - - - 0.33 - - vpmullq (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - vpmullq %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 1.50 1.50 0.33 0.33 - - - - - - - 0.33 - vpmullq (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 1.50 1.50 0.33 0.33 - - - - - - 0.33 - - vpmullq (%rax), %ymm17, %ymm19 # CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - vpmullq %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 1.50 1.50 0.33 0.33 - - - - - - - 0.33 - vpmullq (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1.50 1.50 0.33 0.33 - - - - - - 0.33 - - vpmullq (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - vpmullq %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.50 1.50 0.33 0.33 - - - - - - - 0.33 - vpmullq (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.50 1.50 0.33 0.33 - - - - - - 0.33 - - vpmullq (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangepd $ab, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangepd $ab, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangepd $ab, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangepd $ab, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangepd $ab, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangepd $ab, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangeps $ab, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangeps $ab, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangeps $ab, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangeps $ab, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangeps $ab, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangeps $ab, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducepd $ab, %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducepd $ab, %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducepd $ab, %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducepd $ab, %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducepd $ab, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducepd $ab, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreduceps $ab, %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreduceps $ab, %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreduceps $ab, %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreduceps $ab, %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax){1to8}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax){1to8}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreduceps $ab, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreduceps $ab, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorpd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorpd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorpd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512gfni.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512gfni.s index a4b5f1733e176..b10283b08984a 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512gfni.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512gfni.s @@ -79,31 +79,31 @@ vgf2p8mulb (%rax), %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 24.00 - 5.00 5.00 - - - - - - - 5.00 - +# CHECK-NEXT: 24.00 - 5.00 5.00 - - - - - - 5.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 1.00 - - - - - - - - - - - - vgf2p8affineinvqb $0, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vgf2p8affineinvqb $0, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vgf2p8affineinvqb $0, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vgf2p8affineqb $0, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vgf2p8affineqb $0, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vgf2p8affineqb $0, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vgf2p8mulb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8mulb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8mulb (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vgf2p8mulb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8mulb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8mulb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vgf2p8mulb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8mulb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8mulb (%rax), %zmm17, %zmm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512gfnivl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512gfnivl.s index db03371ee0ff6..a1388eeac6d8a 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512gfnivl.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512gfnivl.s @@ -130,55 +130,55 @@ vgf2p8mulb (%rax), %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 24.00 24.00 10.00 10.00 - - - - - - - 10.00 - +# CHECK-NEXT: 24.00 24.00 10.00 10.00 - - - - - - 10.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineinvqb $0, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineinvqb $0, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineinvqb $0, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineinvqb $0, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineinvqb $0, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineinvqb $0, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineqb $0, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineqb $0, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineqb $0, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineqb $0, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineqb $0, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineqb $0, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8mulb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8mulb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8mulb (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8mulb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8mulb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8mulb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8mulb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8mulb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8mulb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8mulb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8mulb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8mulb (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8mulb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8mulb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8mulb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8mulb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8mulb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8mulb (%rax), %ymm17, %ymm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512ifma.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512ifma.s index 440a81ffa016f..a45e853eadb37 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512ifma.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512ifma.s @@ -66,25 +66,25 @@ vpmadd52luq (%rdi){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 18.00 - 4.00 4.00 - - - - - - - 4.00 - +# CHECK-NEXT: 18.00 - 4.00 4.00 - - - - - - 4.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmadd52huq %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmadd52huq %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmadd52huq %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmadd52luq %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmadd52luq %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmadd52luq %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi){1to8}, %zmm17, %zmm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512ifmavl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512ifmavl.s index f21492bfb979e..85d2d0fa0a36e 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512ifmavl.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512ifmavl.s @@ -104,43 +104,43 @@ vpmadd52luq (%rdi){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 18.00 18.00 8.00 8.00 - - - - - - - 8.00 - +# CHECK-NEXT: 18.00 18.00 8.00 8.00 - - - - - - 8.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmadd52huq %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmadd52huq %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmadd52huq %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmadd52huq %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmadd52huq %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmadd52huq %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmadd52luq %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmadd52luq %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmadd52luq %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmadd52luq %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmadd52luq %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmadd52luq %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi){1to4}, %ymm17, %ymm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vaes.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vaes.s index 41a22ff1c3e2c..e894f2baa25e0 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vaes.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vaes.s @@ -48,15 +48,15 @@ vaesenclast (%rax), %zmm17, %zmm19 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 8.00 - 1.33 1.33 - - - - - - - 1.33 - +# CHECK-NEXT: 8.00 - 1.33 1.33 - - - - - - 1.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 1.00 - - - - - - - - - - - - vaesdec %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vaesdec (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vaesdec (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vaesdeclast %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vaesdeclast (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vaesdeclast (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vaesenc %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vaesenc (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vaesenc (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vaesenclast %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vaesenclast (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vaesenclast (%rax), %zmm17, %zmm19 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vaesvl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vaesvl.s index 78f692ba61669..7bea5b88dc3ad 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vaesvl.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vaesvl.s @@ -68,23 +68,23 @@ vaesenclast (%rax), %ymm17, %ymm19 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 8.00 8.00 2.67 2.67 - - - - - - - 2.67 - +# CHECK-NEXT: 8.00 8.00 2.67 2.67 - - - - - - 2.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesdec %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesdec (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesdec (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesdec %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesdec (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesdec (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesdeclast %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesdeclast (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesdeclast (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesdeclast %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesdeclast (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesdeclast (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesenc %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesenc (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesenc (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesenc %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesenc (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesenc (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesenclast %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesenclast (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesenclast (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesenclast %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesenclast (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesenclast (%rax), %ymm17, %ymm19 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmi.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmi.s index 83f4d3f403224..180c9e938b16d 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmi.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmi.s @@ -86,34 +86,34 @@ vpmultishiftqb (%rax){1to8}, %zmm17, %zmm19 {k1}{z} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 6.00 - 5.00 5.00 - 45.00 - - - - - 5.00 - +# CHECK-NEXT: 6.00 - 5.00 5.00 - 45.00 - - - - 5.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermb (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - vpermi2b %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - vpermi2b (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - vpermi2b (%rax), %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - vpermi2b %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - vpermi2b (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - vpermi2b (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - vpermi2b %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - vpermi2b (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - vpermi2b (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - vpermt2b %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - vpermt2b (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - vpermt2b (%rax), %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - vpermt2b %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - vpermt2b (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - vpermt2b (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - vpermt2b %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - vpermt2b (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - vpermt2b (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmultishiftqb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmultishiftqb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmultishiftqb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmi2.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmi2.s index e4b023feacf9f..ed8a4170d0938 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmi2.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmi2.s @@ -276,14 +276,14 @@ vpshrdw $1, (%rax), %zmm17, %zmm19 {k1}{z} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 117.40 0.40 21.33 21.33 3.00 56.40 1.40 3.00 3.00 3.00 0.40 21.33 - +# CHECK-NEXT: 117.40 0.40 21.33 21.33 3.00 56.40 1.40 3.00 3.00 3.00 21.33 0.40 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpcompressb %zmm16, %zmm19 -# CHECK-NEXT: 1.20 0.20 - - 1.00 2.20 0.20 1.00 1.00 1.00 0.20 - - vpcompressb %zmm16, (%rax) +# CHECK-NEXT: 1.20 0.20 - - 1.00 2.20 0.20 1.00 1.00 1.00 - 0.20 - vpcompressb %zmm16, (%rax) # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpcompressb %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.20 0.20 - - 1.00 2.20 0.20 1.00 1.00 1.00 0.20 - - vpcompressb %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1.20 0.20 - - 1.00 2.20 0.20 1.00 1.00 1.00 - 0.20 - vpcompressb %zmm16, (%rax) {%k1} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpcompressb %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpcompressw %zmm16, %zmm19 # CHECK-NEXT: 1.50 - - - 0.50 2.00 0.50 0.50 0.50 0.50 - - - vpcompressw %zmm16, (%rax) @@ -291,108 +291,108 @@ vpshrdw $1, (%rax), %zmm17, %zmm19 {k1}{z} # CHECK-NEXT: 1.50 - - - 0.50 2.00 0.50 0.50 0.50 0.50 - - - vpcompressw %zmm16, (%rax) {%k1} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpcompressw %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandb %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - vpexpandb (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - vpexpandb (%rax), %zmm19 # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandb %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - vpexpandb (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - vpexpandb (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandb %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandw %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - vpexpandw (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - vpexpandw (%rax), %zmm19 # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandw %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - vpexpandw (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - vpexpandw (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandw %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshldd $1, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldd $1, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldd $1, (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldd $1, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldd $1, (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.50 - - - - 0.50 - - - - - - - vpshldd $1, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshldd $1, (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshldd $1, (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshldd $1, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshldd $1, (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.50 - - - - 0.50 - - - - - - - vpshldd $1, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshldd $1, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshldd $1, (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshldd $1, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshldd $1, (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshldq $1, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldq $1, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldq $1, (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldq $1, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldq $1, (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.50 - - - - 0.50 - - - - - - - vpshldq $1, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshldq $1, (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshldq $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshldq $1, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshldq $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.50 - - - - 0.50 - - - - - - - vpshldq $1, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshldq $1, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshldq $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshldq $1, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshldq $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshldvd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshldvd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshldvd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshldvq %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshldvq %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshldvq %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshldvw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshldvw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshldvw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshldw $1, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldw $1, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldw $1, (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.50 - - - - 0.50 - - - - - - - vpshldw $1, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshldw $1, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshldw $1, (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.50 - - - - 0.50 - - - - - - - vpshldw $1, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshldw $1, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshldw $1, (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshrdd $1, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdd $1, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdd $1, (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdd $1, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdd $1, (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.50 - - - - 0.50 - - - - - - - vpshrdd $1, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshrdd $1, (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshrdd $1, (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshrdd $1, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshrdd $1, (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.50 - - - - 0.50 - - - - - - - vpshrdd $1, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshrdd $1, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshrdd $1, (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshrdd $1, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshrdd $1, (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshrdq $1, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdq $1, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdq $1, (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdq $1, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdq $1, (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.50 - - - - 0.50 - - - - - - - vpshrdq $1, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshrdq $1, (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshrdq $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshrdq $1, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshrdq $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.50 - - - - 0.50 - - - - - - - vpshrdq $1, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshrdq $1, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshrdq $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshrdq $1, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshrdq $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshrdvd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshrdvd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshrdvd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshrdvq %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshrdvq %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshrdvq %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshrdvw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshrdvw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshrdvw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshrdw $1, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdw $1, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdw $1, (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.50 - - - - 0.50 - - - - - - - vpshrdw $1, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshrdw $1, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshrdw $1, (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.50 - - - - 0.50 - - - - - - - vpshrdw $1, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshrdw $1, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshrdw $1, (%rax), %zmm17, %zmm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmi2vl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmi2vl.s index 6aed2f069b7d4..3db09bc332d8f 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmi2vl.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmi2vl.s @@ -524,7 +524,7 @@ vpshrdw $1, (%rax), %ymm17, %ymm19 {k1}{z} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 129.33 117.33 42.67 42.67 4.00 101.33 4.00 4.00 4.00 4.00 - 42.67 - +# CHECK-NEXT: 129.33 117.33 42.67 42.67 4.00 101.33 4.00 4.00 4.00 4.00 42.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -549,214 +549,214 @@ vpshrdw $1, (%rax), %ymm17, %ymm19 {k1}{z} # CHECK-NEXT: 1.50 - - - 0.50 2.00 0.50 0.50 0.50 0.50 - - - vpcompressw %ymm16, (%rax) {%k1} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpcompressw %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandb %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - vpexpandb (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - vpexpandb (%rax), %xmm19 # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandb %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - vpexpandb (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - vpexpandb (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandb %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - vpexpandb (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - vpexpandb (%rax), %ymm19 # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandb %ymm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - vpexpandb (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - vpexpandb (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandb %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandw %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - vpexpandw (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - vpexpandw (%rax), %xmm19 # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandw %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - vpexpandw (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - vpexpandw (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandw %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandw %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - vpexpandw (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - vpexpandw (%rax), %ymm19 # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandw %ymm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - vpexpandw (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - vpexpandw (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandw %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldd $1, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldd $1, (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldd $1, (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldd $1, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldd $1, (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshldd $1, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldd $1, (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldd $1, (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldd $1, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldd $1, (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshldd $1, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldd $1, (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldd $1, (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldd $1, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldd $1, (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldd $1, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldd $1, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldd $1, (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldd $1, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldd $1, (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshldd $1, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldd $1, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldd $1, (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldd $1, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldd $1, (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshldd $1, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldd $1, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldd $1, (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldd $1, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldd $1, (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldq $1, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldq $1, (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldq $1, (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldq $1, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldq $1, (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshldq $1, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldq $1, (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldq $1, (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldq $1, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldq $1, (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshldq $1, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldq $1, (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldq $1, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldq $1, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldq $1, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldq $1, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldq $1, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldq $1, (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldq $1, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldq $1, (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshldq $1, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldq $1, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldq $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldq $1, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldq $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshldq $1, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldq $1, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldq $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldq $1, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldq $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvq %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvq %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvq %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvq %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvq %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvq %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldw $1, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldw $1, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldw $1, (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshldw $1, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldw $1, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldw $1, (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshldw $1, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldw $1, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldw $1, (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldw $1, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldw $1, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldw $1, (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshldw $1, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldw $1, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldw $1, (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshldw $1, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldw $1, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldw $1, (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdd $1, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdd $1, (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdd $1, (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdd $1, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdd $1, (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshrdd $1, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdd $1, (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdd $1, (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdd $1, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdd $1, (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshrdd $1, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdd $1, (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdd $1, (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdd $1, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdd $1, (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdd $1, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdd $1, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdd $1, (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdd $1, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdd $1, (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshrdd $1, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdd $1, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdd $1, (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdd $1, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdd $1, (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshrdd $1, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdd $1, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdd $1, (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdd $1, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdd $1, (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdq $1, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdq $1, (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdq $1, (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdq $1, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdq $1, (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshrdq $1, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdq $1, (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdq $1, (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdq $1, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdq $1, (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshrdq $1, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdq $1, (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdq $1, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdq $1, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdq $1, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdq $1, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdq $1, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdq $1, (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdq $1, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdq $1, (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshrdq $1, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdq $1, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdq $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdq $1, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdq $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshrdq $1, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdq $1, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdq $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdq $1, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdq $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvq %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvq %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvq %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvq %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvq %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvq %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdw $1, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdw $1, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdw $1, (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshrdw $1, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdw $1, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdw $1, (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshrdw $1, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdw $1, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdw $1, (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdw $1, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdw $1, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdw $1, (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshrdw $1, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdw $1, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdw $1, (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshrdw $1, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdw $1, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdw $1, (%rax), %ymm17, %ymm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmivl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmivl.s index 5be411d906dc6..96c1a4e93c276 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmivl.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmivl.s @@ -144,61 +144,61 @@ vpmultishiftqb (%rax){1to4}, %ymm17, %ymm19 {k1}{z} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 8.00 8.00 10.00 10.00 - 86.00 - - - - - 10.00 - +# CHECK-NEXT: 8.00 8.00 10.00 10.00 - 86.00 - - - - 10.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermb (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermb (%rax), %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermi2b %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermi2b (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermi2b (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermi2b %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermi2b (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermi2b (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermi2b %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermi2b (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermi2b (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermi2b %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermi2b (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermi2b (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermi2b %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermi2b (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermi2b (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermi2b %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermi2b (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermi2b (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermt2b %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermt2b (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermt2b (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermt2b %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermt2b (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermt2b (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermt2b %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermt2b (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermt2b (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermt2b %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermt2b (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermt2b (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermt2b %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermt2b (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermt2b (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermt2b %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermt2b (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermt2b (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmultishiftqb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmultishiftqb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmultishiftqb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmultishiftqb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmultishiftqb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmultishiftqb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vl.s index c1d6d5776e587..ebcb70296e50a 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vl.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vl.s @@ -1187,6 +1187,126 @@ vpgatherdd (%rax,%xmm1,2), %xmm2 {k1} vpgatherqq (%rax,%xmm1,2), %xmm2 {k1} vpgatherqd (%rax,%xmm1,2), %xmm2 {k1} +vpmovdb %xmm19, %xmm16 +vpmovdb %xmm19, (%rax) +vpmovdb %xmm19, %xmm16 {k1} +vpmovdb %xmm19, (%rax) {k1} +vpmovdb %xmm19, %xmm16 {k1}{z} + +vpmovdb %ymm19, %xmm16 +vpmovdb %ymm19, (%rax) +vpmovdb %ymm19, %xmm16 {k1} +vpmovdb %ymm19, (%rax) {k1} +vpmovdb %ymm19, %xmm16 {k1}{z} + +vpmovdw %xmm19, %xmm16 +vpmovdw %xmm19, (%rax) +vpmovdw %xmm19, %xmm16 {k1} +vpmovdw %xmm19, (%rax) {k1} +vpmovdw %xmm19, %xmm16 {k1}{z} + +vpmovdw %ymm19, %xmm16 +vpmovdw %ymm19, (%rax) +vpmovdw %ymm19, %xmm16 {k1} +vpmovdw %ymm19, (%rax) {k1} +vpmovdw %ymm19, %xmm16 {k1}{z} + +vpmovqb %xmm19, %xmm16 +vpmovqb %xmm19, (%rax) +vpmovqb %xmm19, %xmm16 {k1} +vpmovqb %xmm19, (%rax) {k1} +vpmovqb %xmm19, %xmm16 {k1}{z} + +vpmovqb %ymm19, %xmm16 +vpmovqb %ymm19, (%rax) +vpmovqb %ymm19, %xmm16 {k1} +vpmovqb %ymm19, (%rax) {k1} +vpmovqb %ymm19, %xmm16 {k1}{z} + +vpmovqd %xmm19, %xmm16 +vpmovqd %xmm19, (%rax) +vpmovqd %xmm19, %xmm16 {k1} +vpmovqd %xmm19, (%rax) {k1} +vpmovqd %xmm19, %xmm16 {k1}{z} + +vpmovqd %ymm19, %xmm16 +vpmovqd %ymm19, (%rax) +vpmovqd %ymm19, %xmm16 {k1} +vpmovqd %ymm19, (%rax) {k1} +vpmovqd %ymm19, %xmm16 {k1}{z} + +vpmovqw %xmm19, %xmm16 +vpmovqw %xmm19, (%rax) +vpmovqw %xmm19, %xmm16 {k1} +vpmovqw %xmm19, (%rax) {k1} +vpmovqw %xmm19, %xmm16 {k1}{z} + +vpmovqw %ymm19, %xmm16 +vpmovqw %ymm19, (%rax) +vpmovqw %ymm19, %xmm16 {k1} +vpmovqw %ymm19, (%rax) {k1} +vpmovqw %ymm19, %xmm16 {k1}{z} + +vpmovsdb %xmm19, %xmm16 +vpmovsdb %xmm19, (%rax) +vpmovsdb %xmm19, %xmm16 {k1} +vpmovsdb %xmm19, (%rax) {k1} +vpmovsdb %xmm19, %xmm16 {k1}{z} + +vpmovsdb %ymm19, %xmm16 +vpmovsdb %ymm19, (%rax) +vpmovsdb %ymm19, %xmm16 {k1} +vpmovsdb %ymm19, (%rax) {k1} +vpmovsdb %ymm19, %xmm16 {k1}{z} + +vpmovsdw %xmm19, %xmm16 +vpmovsdw %xmm19, (%rax) +vpmovsdw %xmm19, %xmm16 {k1} +vpmovsdw %xmm19, (%rax) {k1} +vpmovsdw %xmm19, %xmm16 {k1}{z} + +vpmovsdw %ymm19, %xmm16 +vpmovsdw %ymm19, (%rax) +vpmovsdw %ymm19, %xmm16 {k1} +vpmovsdw %ymm19, (%rax) {k1} +vpmovsdw %ymm19, %xmm16 {k1}{z} + +vpmovsqb %xmm19, %xmm16 +vpmovsqb %xmm19, (%rax) +vpmovsqb %xmm19, %xmm16 {k1} +vpmovsqb %xmm19, (%rax) {k1} +vpmovsqb %xmm19, %xmm16 {k1}{z} + +vpmovsqb %ymm19, %xmm16 +vpmovsqb %ymm19, (%rax) +vpmovsqb %ymm19, %xmm16 {k1} +vpmovsqb %ymm19, (%rax) {k1} +vpmovsqb %ymm19, %xmm16 {k1}{z} + +vpmovsqd %xmm19, %xmm16 +vpmovsqd %xmm19, (%rax) +vpmovsqd %xmm19, %xmm16 {k1} +vpmovsqd %xmm19, (%rax) {k1} +vpmovsqd %xmm19, %xmm16 {k1}{z} + +vpmovsqd %ymm19, %xmm16 +vpmovsqd %ymm19, (%rax) +vpmovsqd %ymm19, %xmm16 {k1} +vpmovsqd %ymm19, (%rax) {k1} +vpmovsqd %ymm19, %xmm16 {k1}{z} + +vpmovsqw %xmm19, %xmm16 +vpmovsqw %xmm19, (%rax) +vpmovsqw %xmm19, %xmm16 {k1} +vpmovsqw %xmm19, (%rax) {k1} +vpmovsqw %xmm19, %xmm16 {k1}{z} + +vpmovsqw %ymm19, %xmm16 +vpmovsqw %ymm19, (%rax) +vpmovsqw %ymm19, %xmm16 {k1} +vpmovsqw %ymm19, (%rax) {k1} +vpmovsqw %ymm19, %xmm16 {k1}{z} + vpmovsxbd %xmm16, %xmm19 vpmovsxbd (%rax), %xmm19 vpmovsxbd %xmm16, %xmm19 {k1} @@ -1257,6 +1377,66 @@ vpmovsxwq (%rax), %ymm19 {k1} vpmovsxwq %xmm16, %ymm19 {z}{k1} vpmovsxwq (%rax), %ymm19 {z}{k1} +vpmovusdb %xmm19, %xmm16 +vpmovusdb %xmm19, (%rax) +vpmovusdb %xmm19, %xmm16 {k1} +vpmovusdb %xmm19, (%rax) {k1} +vpmovusdb %xmm19, %xmm16 {k1}{z} + +vpmovusdb %ymm19, %xmm16 +vpmovusdb %ymm19, (%rax) +vpmovusdb %ymm19, %xmm16 {k1} +vpmovusdb %ymm19, (%rax) {k1} +vpmovusdb %ymm19, %xmm16 {k1}{z} + +vpmovusdw %xmm19, %xmm16 +vpmovusdw %xmm19, (%rax) +vpmovusdw %xmm19, %xmm16 {k1} +vpmovusdw %xmm19, (%rax) {k1} +vpmovusdw %xmm19, %xmm16 {k1}{z} + +vpmovusdw %ymm19, %xmm16 +vpmovusdw %ymm19, (%rax) +vpmovusdw %ymm19, %xmm16 {k1} +vpmovusdw %ymm19, (%rax) {k1} +vpmovusdw %ymm19, %xmm16 {k1}{z} + +vpmovusqb %xmm19, %xmm16 +vpmovusqb %xmm19, (%rax) +vpmovusqb %xmm19, %xmm16 {k1} +vpmovusqb %xmm19, (%rax) {k1} +vpmovusqb %xmm19, %xmm16 {k1}{z} + +vpmovusqb %ymm19, %xmm16 +vpmovusqb %ymm19, (%rax) +vpmovusqb %ymm19, %xmm16 {k1} +vpmovusqb %ymm19, (%rax) {k1} +vpmovusqb %ymm19, %xmm16 {k1}{z} + +vpmovusqd %xmm19, %xmm16 +vpmovusqd %xmm19, (%rax) +vpmovusqd %xmm19, %xmm16 {k1} +vpmovusqd %xmm19, (%rax) {k1} +vpmovusqd %xmm19, %xmm16 {k1}{z} + +vpmovusqd %ymm19, %xmm16 +vpmovusqd %ymm19, (%rax) +vpmovusqd %ymm19, %xmm16 {k1} +vpmovusqd %ymm19, (%rax) {k1} +vpmovusqd %ymm19, %xmm16 {k1}{z} + +vpmovusqw %xmm19, %xmm16 +vpmovusqw %xmm19, (%rax) +vpmovusqw %xmm19, %xmm16 {k1} +vpmovusqw %xmm19, (%rax) {k1} +vpmovusqw %xmm19, %xmm16 {k1}{z} + +vpmovusqw %ymm19, %xmm16 +vpmovusqw %ymm19, (%rax) +vpmovusqw %ymm19, %xmm16 {k1} +vpmovusqw %ymm19, (%rax) {k1} +vpmovusqw %ymm19, %xmm16 {k1}{z} + vpmovzxbd %xmm16, %xmm19 vpmovzxbd (%rax), %xmm19 vpmovzxbd %xmm16, %xmm19 {k1} @@ -2784,6 +2964,106 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 7 27 1.33 * vpgatherdd (%rax,%xmm1,2), %xmm2 {%k1} # CHECK-NEXT: 4 20 1.00 * vpgatherqq (%rax,%xmm1,2), %xmm2 {%k1} # CHECK-NEXT: 4 20 1.00 * vpgatherqd (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: 2 2 1.00 vpmovdb %xmm19, %xmm16 +# CHECK-NEXT: 4 12 1.00 * vpmovdb %xmm19, (%rax) +# CHECK-NEXT: 2 4 1.00 vpmovdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 13 1.00 * vpmovdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 1.00 vpmovdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vpmovdb %ymm19, %xmm16 +# CHECK-NEXT: 4 12 1.00 * vpmovdb %ymm19, (%rax) +# CHECK-NEXT: 2 6 1.00 vpmovdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 15 1.00 * vpmovdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 6 1.00 vpmovdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 2 1.00 vpmovdw %xmm19, %xmm16 +# CHECK-NEXT: 4 12 1.00 * vpmovdw %xmm19, (%rax) +# CHECK-NEXT: 2 4 1.00 vpmovdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 13 1.00 * vpmovdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 1.00 vpmovdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vpmovdw %ymm19, %xmm16 +# CHECK-NEXT: 4 12 1.00 * vpmovdw %ymm19, (%rax) +# CHECK-NEXT: 2 6 1.00 vpmovdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 15 1.00 * vpmovdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 6 1.00 vpmovdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 2 1.00 vpmovqb %xmm19, %xmm16 +# CHECK-NEXT: 4 20 1.00 * vpmovqb %xmm19, (%rax) +# CHECK-NEXT: 2 4 1.00 vpmovqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 13 1.00 * vpmovqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 1.00 vpmovqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vpmovqb %ymm19, %xmm16 +# CHECK-NEXT: 4 12 1.00 * vpmovqb %ymm19, (%rax) +# CHECK-NEXT: 2 6 1.00 vpmovqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 15 1.00 * vpmovqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 6 1.00 vpmovqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vpmovqd %xmm19, %xmm16 +# CHECK-NEXT: 3 12 0.50 * vpmovqd %xmm19, (%rax) +# CHECK-NEXT: 1 1 0.50 vpmovqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 3 12 0.50 * vpmovqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.50 vpmovqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vpmovqd %ymm19, %xmm16 +# CHECK-NEXT: 3 12 1.00 * vpmovqd %ymm19, (%rax) +# CHECK-NEXT: 1 3 1.00 vpmovqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 3 14 1.00 * vpmovqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 3 1.00 vpmovqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 2 1.00 vpmovqw %xmm19, %xmm16 +# CHECK-NEXT: 4 12 1.00 * vpmovqw %xmm19, (%rax) +# CHECK-NEXT: 2 4 1.00 vpmovqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 13 1.00 * vpmovqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 1.00 vpmovqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vpmovqw %ymm19, %xmm16 +# CHECK-NEXT: 4 12 1.00 * vpmovqw %ymm19, (%rax) +# CHECK-NEXT: 2 6 1.00 vpmovqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 15 1.00 * vpmovqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 6 1.00 vpmovqw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 2 1.00 vpmovsdb %xmm19, %xmm16 +# CHECK-NEXT: 4 12 1.00 * vpmovsdb %xmm19, (%rax) +# CHECK-NEXT: 2 4 1.00 vpmovsdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 13 1.00 * vpmovsdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 1.00 vpmovsdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vpmovsdb %ymm19, %xmm16 +# CHECK-NEXT: 4 12 1.00 * vpmovsdb %ymm19, (%rax) +# CHECK-NEXT: 2 6 1.00 vpmovsdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 15 1.00 * vpmovsdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 6 1.00 vpmovsdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 2 1.00 vpmovsdw %xmm19, %xmm16 +# CHECK-NEXT: 4 12 1.00 * vpmovsdw %xmm19, (%rax) +# CHECK-NEXT: 2 4 1.00 vpmovsdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 13 1.00 * vpmovsdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 1.00 vpmovsdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vpmovsdw %ymm19, %xmm16 +# CHECK-NEXT: 4 12 1.00 * vpmovsdw %ymm19, (%rax) +# CHECK-NEXT: 2 6 1.00 vpmovsdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 15 1.00 * vpmovsdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 6 1.00 vpmovsdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 2 1.00 vpmovsqb %xmm19, %xmm16 +# CHECK-NEXT: 4 20 1.00 * vpmovsqb %xmm19, (%rax) +# CHECK-NEXT: 2 4 1.00 vpmovsqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 13 1.00 * vpmovsqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 1.00 vpmovsqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vpmovsqb %ymm19, %xmm16 +# CHECK-NEXT: 4 12 1.00 * vpmovsqb %ymm19, (%rax) +# CHECK-NEXT: 2 6 1.00 vpmovsqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 15 1.00 * vpmovsqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 6 1.00 vpmovsqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 2 1.00 vpmovsqd %xmm19, %xmm16 +# CHECK-NEXT: 4 12 1.00 * vpmovsqd %xmm19, (%rax) +# CHECK-NEXT: 2 2 1.00 vpmovsqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 13 1.00 * vpmovsqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 2 1.00 vpmovsqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vpmovsqd %ymm19, %xmm16 +# CHECK-NEXT: 4 12 1.00 * vpmovsqd %ymm19, (%rax) +# CHECK-NEXT: 2 4 1.00 vpmovsqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 15 1.00 * vpmovsqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 1.00 vpmovsqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 2 1.00 vpmovsqw %xmm19, %xmm16 +# CHECK-NEXT: 4 12 1.00 * vpmovsqw %xmm19, (%rax) +# CHECK-NEXT: 2 4 1.00 vpmovsqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 13 1.00 * vpmovsqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 1.00 vpmovsqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vpmovsqw %ymm19, %xmm16 +# CHECK-NEXT: 4 12 1.00 * vpmovsqw %ymm19, (%rax) +# CHECK-NEXT: 2 6 1.00 vpmovsqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 15 1.00 * vpmovsqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 6 1.00 vpmovsqw %ymm19, %xmm16 {%k1} {z} # CHECK-NEXT: 1 1 0.50 vpmovsxbd %xmm16, %xmm19 # CHECK-NEXT: 2 8 0.50 * vpmovsxbd (%rax), %xmm19 # CHECK-NEXT: 1 1 0.50 vpmovsxbd %xmm16, %xmm19 {%k1} @@ -2844,6 +3124,56 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 2 11 1.00 * vpmovsxwq (%rax), %ymm19 {%k1} # CHECK-NEXT: 1 3 1.00 vpmovsxwq %xmm16, %ymm19 {%k1} {z} # CHECK-NEXT: 2 11 1.00 * vpmovsxwq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 2 1.00 vpmovusdb %xmm19, %xmm16 +# CHECK-NEXT: 4 12 1.00 * vpmovusdb %xmm19, (%rax) +# CHECK-NEXT: 2 4 1.00 vpmovusdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 13 1.00 * vpmovusdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 1.00 vpmovusdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vpmovusdb %ymm19, %xmm16 +# CHECK-NEXT: 4 12 1.00 * vpmovusdb %ymm19, (%rax) +# CHECK-NEXT: 2 6 1.00 vpmovusdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 15 1.00 * vpmovusdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 6 1.00 vpmovusdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 2 1.00 vpmovusdw %xmm19, %xmm16 +# CHECK-NEXT: 4 12 1.00 * vpmovusdw %xmm19, (%rax) +# CHECK-NEXT: 2 4 1.00 vpmovusdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 13 1.00 * vpmovusdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 1.00 vpmovusdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vpmovusdw %ymm19, %xmm16 +# CHECK-NEXT: 4 12 1.00 * vpmovusdw %ymm19, (%rax) +# CHECK-NEXT: 2 6 1.00 vpmovusdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 15 1.00 * vpmovusdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 6 1.00 vpmovusdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 2 1.00 vpmovusqb %xmm19, %xmm16 +# CHECK-NEXT: 4 20 1.00 * vpmovusqb %xmm19, (%rax) +# CHECK-NEXT: 2 4 1.00 vpmovusqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 13 1.00 * vpmovusqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 1.00 vpmovusqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vpmovusqb %ymm19, %xmm16 +# CHECK-NEXT: 4 12 1.00 * vpmovusqb %ymm19, (%rax) +# CHECK-NEXT: 2 6 1.00 vpmovusqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 15 1.00 * vpmovusqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 6 1.00 vpmovusqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 2 1.00 vpmovusqd %xmm19, %xmm16 +# CHECK-NEXT: 4 12 1.00 * vpmovusqd %xmm19, (%rax) +# CHECK-NEXT: 2 2 1.00 vpmovusqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 13 1.00 * vpmovusqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 2 1.00 vpmovusqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vpmovusqd %ymm19, %xmm16 +# CHECK-NEXT: 4 12 1.00 * vpmovusqd %ymm19, (%rax) +# CHECK-NEXT: 2 4 1.00 vpmovusqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 15 1.00 * vpmovusqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 1.00 vpmovusqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 2 1.00 vpmovusqw %xmm19, %xmm16 +# CHECK-NEXT: 4 12 1.00 * vpmovusqw %xmm19, (%rax) +# CHECK-NEXT: 2 4 1.00 vpmovusqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 13 1.00 * vpmovusqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 1.00 vpmovusqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vpmovusqw %ymm19, %xmm16 +# CHECK-NEXT: 4 12 1.00 * vpmovusqw %ymm19, (%rax) +# CHECK-NEXT: 2 6 1.00 vpmovusqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 15 1.00 * vpmovusqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 6 1.00 vpmovusqw %ymm19, %xmm16 {%k1} {z} # CHECK-NEXT: 1 1 0.50 vpmovzxbd %xmm16, %xmm19 # CHECK-NEXT: 2 8 0.50 * vpmovzxbd (%rax), %xmm19 # CHECK-NEXT: 1 1 0.50 vpmovzxbd %xmm16, %xmm19 {%k1} @@ -3274,1531 +3604,1681 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 404.53 412.53 329.00 329.00 46.00 797.53 3.20 46.00 46.00 46.00 3.20 329.00 - +# CHECK-NEXT: 404.53 485.03 329.00 329.00 76.00 1015.03 3.20 76.00 76.00 76.00 329.00 3.20 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddpd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddpd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddpd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignd $1, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignd $1, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignd $1, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignd $1, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignd $1, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignd $1, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignq $1, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignq $1, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignq $1, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignq $1, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignq $1, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignq $1, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastf32x4 (%rax), %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcastf32x4 (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcastf32x4 (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcasti32x4 (%rax), %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcasti32x4 (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcasti32x4 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastf32x4 (%rax), %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcastf32x4 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcastf32x4 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcasti32x4 (%rax), %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcasti32x4 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcasti32x4 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastsd %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastsd (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastsd (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastsd %xmm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcastsd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcastsd (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastsd %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcastsd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcastsd (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastss %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastss (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastss (%rax), %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastss %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcastss (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcastss (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastss %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcastss (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcastss (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastss %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastss (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastss (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastss %xmm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcastss (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcastss (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastss %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcastss (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcastss (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqpd %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqpd (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqpd (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqpd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqpd (%rax){1to2}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqpd %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqpd (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqpd (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqpd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqpd (%rax){1to2}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqpd %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqpd (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqpd (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqpd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqpd (%rax){1to4}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqpd %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqpd (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqpd (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqpd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqpd (%rax){1to4}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqps %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqps (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqps (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqps (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqps (%rax){1to4}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqps %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqps (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqps (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqps (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqps (%rax){1to4}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqps %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqps (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqps (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqps (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqps (%rax){1to8}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqps %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqps (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqps (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqps (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqps (%rax){1to8}, %ymm1, %k2 {%k3} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtdq2pd %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtdq2pd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtdq2pd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtdq2pd %xmm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtdq2pd %xmm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtdq2pd %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtdq2ps %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtdq2ps %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtdq2ps %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtdq2ps %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax){1to8}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax){1to8}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtdq2ps %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtdq2ps %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2dq %ymm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dqy (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dqy (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dq (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2dq %ymm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dqy (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2dq %ymm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dqy (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2dq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dqx (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dqx (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2dq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dqx (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2dq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dqx (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2ps %ymm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2psy (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2ps (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2psy (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2ps (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2ps %ymm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2psy (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2psy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2ps %ymm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2psy (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2psy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2ps %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2psx (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2ps (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2psx (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2ps (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2ps %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2psx (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2psx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2ps %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2psx (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2psx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2udq %ymm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udqy (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udqy (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udq (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2udq %ymm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udqy (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2udq %ymm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udqy (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2udq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udqx (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udqx (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2udq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udqx (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2udq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udqx (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2dq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2dq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2dq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2dq %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax){1to8}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2dq %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2dq %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2pd %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2pd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2pd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2pd %xmm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2pd %xmm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2pd %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2udq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2udq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2udq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2udq %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax){1to8}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2udq %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2udq %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2dq %ymm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dqy (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dqy (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dq (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2dq %ymm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dqy (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2dq %ymm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dqy (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2dq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dqx (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dqx (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2dq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dqx (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2dq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dqx (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2dq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2dq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2dq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2dq %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax){1to8}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2dq %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2dq %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2udq %ymm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udqy (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udqy (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udq (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2udq %ymm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udqy (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2udq %ymm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udqy (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2udq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udqx (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udqx (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2udq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udqx (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2udq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udqx (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2udq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2udq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2udq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2udq %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax){1to8}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2udq %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2udq %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivpd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivpd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivpd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132pd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132pd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132pd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132pd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132pd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132pd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213pd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213pd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213pd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213pd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213pd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213pd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231pd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231pd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231pd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231pd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231pd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231pd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - - 1.33 - vgatherdpd (%rax,%xmm1,2), %ymm2 {%k1} -# CHECK-NEXT: 1.00 1.00 2.67 2.67 - 1.00 - - - - - 2.67 - vgatherdps (%rax,%ymm1,2), %ymm2 {%k1} -# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - - 1.33 - vgatherqpd (%rax,%ymm1,2), %ymm2 {%k1} -# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - - 1.33 - vgatherqps (%rax,%ymm1,2), %xmm2 {%k1} -# CHECK-NEXT: 1.00 0.50 0.67 0.67 - 0.50 - - - - - 0.67 - vgatherdpd (%rax,%xmm1,2), %xmm2 {%k1} -# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - - 1.33 - vgatherdps (%rax,%xmm1,2), %xmm2 {%k1} -# CHECK-NEXT: 1.00 0.50 0.67 0.67 - 0.50 - - - - - 0.67 - vgatherqpd (%rax,%xmm1,2), %xmm2 {%k1} -# CHECK-NEXT: 1.00 0.50 0.67 0.67 - 0.50 - - - - - 0.67 - vgatherqps (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - 1.33 - - vgatherdpd (%rax,%xmm1,2), %ymm2 {%k1} +# CHECK-NEXT: 1.00 1.00 2.67 2.67 - 1.00 - - - - 2.67 - - vgatherdps (%rax,%ymm1,2), %ymm2 {%k1} +# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - 1.33 - - vgatherqpd (%rax,%ymm1,2), %ymm2 {%k1} +# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - 1.33 - - vgatherqps (%rax,%ymm1,2), %xmm2 {%k1} +# CHECK-NEXT: 1.00 0.50 0.67 0.67 - 0.50 - - - - 0.67 - - vgatherdpd (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - 1.33 - - vgatherdps (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: 1.00 0.50 0.67 0.67 - 0.50 - - - - 0.67 - - vgatherqpd (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: 1.00 0.50 0.67 0.67 - 0.50 - - - - 0.67 - - vgatherqps (%rax,%xmm1,2), %xmm2 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxpd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxpd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxpd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminpd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminpd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminpd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovapd %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovapd (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovapd (%rax), %xmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovapd %xmm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovapd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovapd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovapd (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovapd %xmm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovapd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovapd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovapd (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovapd %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovapd (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovapd (%rax), %ymm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovapd %ymm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovapd %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovapd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovapd (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovapd %ymm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovapd %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovapd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovapd (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovaps %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovaps (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovaps (%rax), %xmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovaps %xmm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovaps %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovaps (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovaps (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovaps %xmm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovaps %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovaps (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovaps (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovaps %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovaps (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovaps (%rax), %ymm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovaps %ymm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovaps %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovaps (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovaps (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovaps %ymm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovaps %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovaps (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovaps (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovddup %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovddup (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovddup (%rax), %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovddup %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovddup (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovddup (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovddup %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovddup (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovddup (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqa32 %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqa32 (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqa32 (%rax), %xmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa32 %xmm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqa32 %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqa32 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqa32 (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa32 %xmm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqa32 %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqa32 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqa32 (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqa32 %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqa32 (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqa32 (%rax), %ymm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa32 %ymm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqa32 %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqa32 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqa32 (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa32 %ymm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqa32 %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqa32 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqa32 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqa64 %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqa64 (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqa64 (%rax), %xmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa64 %xmm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqa64 %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqa64 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqa64 (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa64 %xmm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqa64 %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqa64 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqa64 (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqa64 %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqa64 (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqa64 (%rax), %ymm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa64 %ymm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqa64 %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqa64 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqa64 (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa64 %ymm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqa64 %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqa64 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqa64 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu32 %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu32 (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu32 (%rax), %xmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu32 %xmm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu32 %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu32 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu32 (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu32 %xmm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu32 %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu32 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu32 (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu32 %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu32 (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu32 (%rax), %ymm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu32 %ymm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu32 %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu32 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu32 (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu32 %ymm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu32 %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu32 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu32 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu64 %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu64 (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu64 (%rax), %xmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu64 %xmm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu64 %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu64 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu64 (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu64 %xmm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu64 %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu64 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu64 (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu64 %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu64 (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu64 (%rax), %ymm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu64 %ymm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu64 %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu64 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu64 (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu64 %ymm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu64 %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu64 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu64 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovddup %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovddup (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovddup (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovddup %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovddup (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovddup (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovddup %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovddup (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - {evex} vmovntdqa (%rax), %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - {evex} vmovntdqa (%rax), %ymm0 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovddup (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - {evex} vmovntdqa (%rax), %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - {evex} vmovntdqa (%rax), %ymm0 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovshdup %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovshdup (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovshdup (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovshdup %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovshdup (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovshdup (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovshdup %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovshdup (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovshdup (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovshdup %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovshdup (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovshdup (%rax), %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovshdup %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovshdup (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovshdup (%rax), %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovshdup %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovshdup (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovshdup (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovsldup %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovsldup (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovsldup (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovsldup %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovsldup (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovsldup (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovsldup %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovsldup (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovsldup (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovsldup %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovsldup (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovsldup (%rax), %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovsldup %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovsldup (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovsldup (%rax), %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovsldup %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovsldup (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovsldup (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovupd %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovupd (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovupd (%rax), %xmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovupd %xmm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovupd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovupd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovupd (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovupd %xmm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovupd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovupd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovupd (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovupd %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovupd (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovupd (%rax), %ymm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovupd %ymm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovupd %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovupd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovupd (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovupd %ymm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovupd %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovupd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovupd (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovups %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovups (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovups (%rax), %xmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovups %xmm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovups %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovups (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovups (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovups %xmm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovups %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovups (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovups (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovups %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovups (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovups (%rax), %ymm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovups %ymm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovups %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovups (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovups (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovups %ymm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovups %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovups (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovups (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulpd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulpd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulpd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddq %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddq %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddq %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddq %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddq %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddq %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastd %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastd (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastd (%rax), %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpbroadcastd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpbroadcastd (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpbroadcastd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpbroadcastd (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastd %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastd (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastd (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastd %xmm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpbroadcastd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpbroadcastd (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastd %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpbroadcastd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpbroadcastd (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastq %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastq (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastq (%rax), %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpbroadcastq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpbroadcastq (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpbroadcastq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpbroadcastq (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastq %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastq (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastq %xmm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpbroadcastq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpbroadcastq (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastq %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpbroadcastq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpbroadcastq (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqd %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax){1to4}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqd %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax){1to4}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqd %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax){1to8}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqd %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax){1to8}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqd %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax){1to4}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqd %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax){1to4}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqd %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax){1to8}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqd %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax){1to8}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqq %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax){1to2}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqq %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax){1to2}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqq %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax){1to4}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqq %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax){1to4}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtd %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtd (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtd (%rax){1to4}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtd %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtd (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtd (%rax){1to4}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtd %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtd (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtd (%rax){1to8}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtd %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtd (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtd (%rax){1to8}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtq %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax){1to2}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtq %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax){1to2}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtq %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax){1to4}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtq %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax){1to4}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqq %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax){1to2}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqq %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax){1to2}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqq %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax){1to4}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqq %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax){1to4}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequd %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequd (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequd (%rax){1to4}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequd %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequd (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequd (%rax){1to4}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequd %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequd (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequd (%rax){1to8}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequd %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequd (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequd (%rax){1to8}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequq %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequq (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequq (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequq (%rax){1to2}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequq %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequq (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequq (%rax){1to2}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequq %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequq (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequq (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequq (%rax){1to4}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequq %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequq (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequq (%rax){1to4}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd $0, %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax), %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax){1to2}, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd $0, %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax), %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd $0, %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd $0, %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax), %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax){1to4}, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax){1to4}, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd $0, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax), %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd $0, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps $0, %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax), %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax){1to4}, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps $0, %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax), %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps $0, %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps $0, %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax), %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax){1to8}, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps $0, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax), %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps $0, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd $0, %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $0, (%rax), %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $0, (%rax){1to4}, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $0, (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $0, (%rax){1to4}, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd $0, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $0, (%rax), %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $0, (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $0, (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $0, (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd $0, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $0, (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $0, (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $0, (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $0, (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq $0, %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $0, (%rax), %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $0, (%rax){1to4}, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $0, (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $0, (%rax){1to4}, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq $0, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $0, (%rax), %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $0, (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $0, (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $0, (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq $0, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $0, (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $0, (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $0, (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $0, (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - - 1.33 - vpgatherdq (%rax,%xmm1,2), %ymm2 {%k1} -# CHECK-NEXT: 1.00 1.00 2.67 2.67 - 1.00 - - - - - 2.67 - vpgatherdd (%rax,%ymm1,2), %ymm2 {%k1} -# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - - 1.33 - vpgatherqq (%rax,%ymm1,2), %ymm2 {%k1} -# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - - 1.33 - vpgatherqd (%rax,%ymm1,2), %xmm2 {%k1} -# CHECK-NEXT: 1.00 0.50 0.67 0.67 - 0.50 - - - - - 0.67 - vpgatherdq (%rax,%xmm1,2), %xmm2 {%k1} -# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - - 1.33 - vpgatherdd (%rax,%xmm1,2), %xmm2 {%k1} -# CHECK-NEXT: 1.00 0.50 0.67 0.67 - 0.50 - - - - - 0.67 - vpgatherqq (%rax,%xmm1,2), %xmm2 {%k1} -# CHECK-NEXT: 1.00 0.50 0.67 0.67 - 0.50 - - - - - 0.67 - vpgatherqd (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - 1.33 - - vpgatherdq (%rax,%xmm1,2), %ymm2 {%k1} +# CHECK-NEXT: 1.00 1.00 2.67 2.67 - 1.00 - - - - 2.67 - - vpgatherdd (%rax,%ymm1,2), %ymm2 {%k1} +# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - 1.33 - - vpgatherqq (%rax,%ymm1,2), %ymm2 {%k1} +# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - 1.33 - - vpgatherqd (%rax,%ymm1,2), %xmm2 {%k1} +# CHECK-NEXT: 1.00 0.50 0.67 0.67 - 0.50 - - - - 0.67 - - vpgatherdq (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - 1.33 - - vpgatherdd (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: 1.00 0.50 0.67 0.67 - 0.50 - - - - 0.67 - - vpgatherqq (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: 1.00 0.50 0.67 0.67 - 0.50 - - - - 0.67 - - vpgatherqd (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovdb %xmm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovdb %xmm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovdb %ymm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovdb %ymm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovdw %xmm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovdw %xmm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovdw %ymm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovdw %ymm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovqb %xmm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovqb %xmm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovqb %ymm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovqb %ymm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovqd %xmm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - vpmovqd %xmm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - vpmovqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovqd %ymm19, %xmm16 +# CHECK-NEXT: - - - - 0.50 1.00 - 0.50 0.50 0.50 - - - vpmovqd %ymm19, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.50 1.00 - 0.50 0.50 0.50 - - - vpmovqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovqw %xmm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovqw %xmm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovqw %ymm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovqw %ymm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovqw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsdb %xmm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovsdb %xmm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovsdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsdb %ymm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovsdb %ymm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovsdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsdw %xmm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovsdw %xmm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovsdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsdw %ymm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovsdw %ymm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovsdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsqb %xmm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovsqb %xmm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovsqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsqb %ymm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovsqb %ymm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovsqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsqd %xmm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovsqd %xmm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovsqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsqd %ymm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovsqd %ymm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovsqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsqw %xmm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovsqw %xmm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovsqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsqw %ymm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovsqw %ymm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovsqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovsqw %ymm19, %xmm16 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbd %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbd (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbd (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbd (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbd (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbd (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbd %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbd (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbd (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbd %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbd (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbd (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbd %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbd (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbq %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbq (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbq (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbq (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbq (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbq %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbq (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbq %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbq (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbq %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbq (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxdq %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxdq (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxdq (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxdq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxdq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxdq (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxdq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxdq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxdq (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxdq %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxdq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxdq (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxdq %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxdq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxdq (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxdq %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxdq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxdq (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxwd %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxwd (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxwd (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxwd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxwd (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxwd (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxwd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxwd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxwd (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwd %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwd (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwd (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwd %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwd (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwd (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwd %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwd (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxwq %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxwq (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxwq (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxwq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxwq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxwq (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxwq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxwq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxwq (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwq %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwq (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwq %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwq (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwq %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusdb %xmm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovusdb %xmm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovusdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusdb %ymm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovusdb %ymm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovusdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusdw %xmm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovusdw %xmm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovusdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusdw %ymm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovusdw %ymm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovusdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusqb %xmm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovusqb %xmm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovusqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusqb %ymm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovusqb %ymm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovusqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusqd %xmm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovusqd %xmm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovusqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusqd %ymm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovusqd %ymm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovusqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusqw %xmm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovusqw %xmm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovusqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusqw %ymm19, %xmm16 +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovusqw %ymm19, (%rax) +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovusqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovusqw %ymm19, %xmm16 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbd %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbd (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbd (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbd (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbd (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbd (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbd %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbd (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbd (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbd %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbd (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbd (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbd %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbd (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbq %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbq (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbq (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbq (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbq (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbq %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbq (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbq %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbq (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbq %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbq (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxdq %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxdq (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxdq (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxdq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxdq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxdq (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxdq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxdq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxdq (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxdq %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxdq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxdq (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxdq %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxdq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxdq (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxdq %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxdq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxdq (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxwd %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxwd (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxwd (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxwd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxwd (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxwd (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxwd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxwd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxwd (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwd %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwd (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwd (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwd %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwd (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwd (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwd %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwd (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxwq %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxwq (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxwq (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxwq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxwq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxwq (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxwq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxwq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxwq (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwq %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwq (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwq %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwq (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwq %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwq (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vpmulld %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vpmulld %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vpmulld %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vpmulld %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vpmulld %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vpmulld %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 0.20 - - vpscatterdd %xmm1, (%rdx,%xmm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 0.20 - - vpscatterdq %xmm1, (%rdx,%xmm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 0.20 - - vpscatterqd %xmm1, (%rdx,%xmm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 0.20 - - vpscatterqq %xmm1, (%rdx,%xmm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 4.00 0.20 0.20 4.00 4.00 4.00 0.20 - - vpscatterdd %ymm1, (%rdx,%ymm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 0.20 - - vpscatterdq %ymm1, (%rdx,%xmm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 0.20 - - vpscatterqd %xmm1, (%rdx,%ymm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 0.20 - - vpscatterqq %ymm1, (%rdx,%ymm0,4) {%k1} +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 - 0.20 - vpscatterdd %xmm1, (%rdx,%xmm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 - 0.20 - vpscatterdq %xmm1, (%rdx,%xmm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 - 0.20 - vpscatterqd %xmm1, (%rdx,%xmm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 - 0.20 - vpscatterqq %xmm1, (%rdx,%xmm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 4.00 0.20 0.20 4.00 4.00 4.00 - 0.20 - vpscatterdd %ymm1, (%rdx,%ymm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 - 0.20 - vpscatterdq %ymm1, (%rdx,%xmm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 - 0.20 - vpscatterqd %xmm1, (%rdx,%ymm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 - 0.20 - vpscatterqq %ymm1, (%rdx,%ymm0,4) {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufd $0, %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $0, (%rax), %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $0, (%rax){1to4}, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $0, (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $0, (%rax){1to4}, %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufd $0, %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $0, (%rax), %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $0, (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $0, (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $0, (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufd $0, %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $0, (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $0, (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $0, (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $0, (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufd $0, %ymm16, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $0, (%rax), %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $0, (%rax){1to8}, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $0, (%rax), %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $0, (%rax){1to8}, %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufd $0, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $0, (%rax), %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $0, (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $0, (%rax), %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $0, (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufd $0, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $0, (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $0, (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $0, (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $0, (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubq %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubq %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubq %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubq %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubq %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubq %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmd %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmd (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmd (%rax){1to4}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmd %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmd (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmd (%rax){1to4}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmd %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmd (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmd (%rax){1to8}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmd %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmd (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmd (%rax){1to8}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmq %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmq (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmq (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmq (%rax){1to2}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmq %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmq (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmq (%rax){1to2}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmq %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmq (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmq (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmq (%rax){1to4}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmq %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmq (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmq (%rax){1to4}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmd %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmd (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmd (%rax){1to4}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmd %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmd (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmd (%rax){1to4}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmd %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmd (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmd (%rax){1to8}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmd %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmd (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmd (%rax){1to8}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmq %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmq (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmq (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmq (%rax){1to2}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmq %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmq (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmq (%rax){1to2}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmq %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmq (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmq (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmq (%rax){1to4}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmq %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmq (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmq (%rax){1to4}, %ymm1, %k2 {%k3} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhdq %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhdq %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhdq %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhdq %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhdq %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhdq %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckldq %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckldq %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckldq %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckldq %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckldq %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckldq %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 0.20 - - vscatterdps %xmm1, (%rdx,%xmm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 0.20 - - vscatterdpd %xmm1, (%rdx,%xmm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 0.20 - - vscatterqps %xmm1, (%rdx,%xmm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 0.20 - - vscatterqpd %xmm1, (%rdx,%xmm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 4.00 0.20 0.20 4.00 4.00 4.00 0.20 - - vscatterdps %ymm1, (%rdx,%ymm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 0.20 - - vscatterdpd %ymm1, (%rdx,%xmm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 0.20 - - vscatterqps %xmm1, (%rdx,%ymm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 0.20 - - vscatterqpd %ymm1, (%rdx,%ymm0,4) {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 - 0.20 - vscatterdps %xmm1, (%rdx,%xmm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 - 0.20 - vscatterdpd %xmm1, (%rdx,%xmm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 - 0.20 - vscatterqps %xmm1, (%rdx,%xmm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 - 0.20 - vscatterqpd %xmm1, (%rdx,%xmm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 4.00 0.20 0.20 4.00 4.00 4.00 - 0.20 - vscatterdps %ymm1, (%rdx,%ymm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 - 0.20 - vscatterdpd %ymm1, (%rdx,%xmm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 - 0.20 - vscatterqps %xmm1, (%rdx,%ymm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 - 0.20 - vscatterqpd %ymm1, (%rdx,%ymm0,4) {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshuff32x4 $0, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff32x4 $0, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshuff32x4 $0, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff32x4 $0, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshuff32x4 $0, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff32x4 $0, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshuff64x2 $0, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff64x2 $0, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff64x2 $0, (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff64x2 $0, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff64x2 $0, (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshuff64x2 $0, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff64x2 $0, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff64x2 $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff64x2 $0, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff64x2 $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshuff64x2 $0, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff64x2 $0, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff64x2 $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff64x2 $0, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff64x2 $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshufi32x4 $0, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi32x4 $0, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi32x4 $0, (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi32x4 $0, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi32x4 $0, (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshufi32x4 $0, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi32x4 $0, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi32x4 $0, (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi32x4 $0, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi32x4 $0, (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshufi32x4 $0, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi32x4 $0, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi32x4 $0, (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi32x4 $0, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi32x4 $0, (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshufi64x2 $0, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi64x2 $0, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi64x2 $0, (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi64x2 $0, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi64x2 $0, (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshufi64x2 $0, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi64x2 $0, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi64x2 $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi64x2 $0, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi64x2 $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshufi64x2 $0, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi64x2 $0, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi64x2 $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi64x2 $0, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi64x2 $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtpd %xmm16, %xmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax), %xmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax){1to2}, %xmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax), %xmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax){1to2}, %xmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtpd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax), %xmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtpd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtpd %ymm16, %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax), %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax){1to4}, %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax), %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax){1to4}, %ymm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtpd %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax), %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtpd %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtps %xmm16, %xmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax), %xmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax){1to4}, %xmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax), %xmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax){1to4}, %xmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtps %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax), %xmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax), %xmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtps %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtps %ymm16, %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax), %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax), %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax){1to8}, %ymm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtps %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax), %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtps %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubpd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubpd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubpd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhpd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhpd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhpd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklpd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklpd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklpd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vnni.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vnni.s index 2bfc313877d1e..f24d20b19c69a 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vnni.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vnni.s @@ -104,43 +104,43 @@ vpdpwssds (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 36.00 - 8.00 8.00 - - - - - - - 8.00 - +# CHECK-NEXT: 36.00 - 8.00 8.00 - - - - - - 8.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpdpbusd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpdpbusd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpdpbusd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpdpbusds %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpdpbusds %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpdpbusds %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpdpwssd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpdpwssd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpdpwssd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpdpwssds %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpdpwssds %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpdpwssds %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vnnivl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vnnivl.s index 958193d0b4497..2b661601690a9 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vnnivl.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vnnivl.s @@ -180,79 +180,79 @@ vpdpwssds (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 36.00 36.00 16.00 16.00 - - - - - - - 16.00 - +# CHECK-NEXT: 36.00 36.00 16.00 16.00 - - - - - - 16.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusds %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusds %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusds %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusds %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusds %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusds %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssds %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssds %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssds %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssds %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssds %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssds %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpclmulqdq.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpclmulqdq.s index 0c28cf8400387..35c5137380fb7 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpclmulqdq.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpclmulqdq.s @@ -33,9 +33,9 @@ vpclmulqdq $11, (%rax), %zmm17, %zmm19 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpclmulqdq $11, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpclmulqdq $11, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpclmulqdq $11, (%rax), %zmm17, %zmm19 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpclmulqdqvl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpclmulqdqvl.s index 700cb1a03db50..1b5597aeaa57f 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpclmulqdqvl.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpclmulqdqvl.s @@ -38,11 +38,11 @@ vpclmulqdq $11, (%rax), %ymm17, %ymm19 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - - 0.67 0.67 - 4.00 - - - - - 0.67 - +# CHECK-NEXT: - - 0.67 0.67 - 4.00 - - - - 0.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpclmulqdq $11, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpclmulqdq $11, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpclmulqdq $11, (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpclmulqdq $11, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpclmulqdq $11, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpclmulqdq $11, (%rax), %ymm17, %ymm19 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpopcntdq.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpopcntdq.s index 174540dc972bd..4587dbd97e93f 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpopcntdq.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpopcntdq.s @@ -70,25 +70,25 @@ vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - - 4.00 4.00 - 18.00 - - - - - 4.00 - +# CHECK-NEXT: - - 4.00 4.00 - 18.00 - - - - 4.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntd %zmm1, %zmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi), %zmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi){1to16}, %zmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi), %zmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi){1to16}, %zmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntd %zmm1, %zmm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi), %zmm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi){1to16}, %zmm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi), %zmm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi){1to16}, %zmm0 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntd %zmm1, %zmm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi), %zmm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi), %zmm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntq %zmm1, %zmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi), %zmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi){1to8}, %zmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi), %zmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi){1to8}, %zmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntq %zmm1, %zmm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi), %zmm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi){1to8}, %zmm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi), %zmm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi){1to8}, %zmm0 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntq %zmm1, %zmm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi), %zmm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi), %zmm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpopcntdqvl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpopcntdqvl.s index 9b5ebc7e0e9ea..d425c64196fef 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpopcntdqvl.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpopcntdqvl.s @@ -112,43 +112,43 @@ vpopcntq (%rdi){1to4}, %ymm0 {%k1} {z} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - - 8.00 8.00 - 36.00 - - - - - 8.00 - +# CHECK-NEXT: - - 8.00 8.00 - 36.00 - - - - 8.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntd %xmm1, %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi), %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi){1to4}, %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi), %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi){1to4}, %xmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntd %xmm1, %xmm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi), %xmm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi){1to4}, %xmm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi), %xmm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi){1to4}, %xmm0 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntd %xmm1, %xmm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi), %xmm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi){1to4}, %xmm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi), %xmm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi){1to4}, %xmm0 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntd %ymm1, %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi), %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi){1to8}, %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi){1to8}, %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntd %ymm1, %ymm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi), %ymm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi){1to8}, %ymm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi), %ymm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi){1to8}, %ymm0 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntd %ymm1, %ymm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi), %ymm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi){1to8}, %ymm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi), %ymm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi){1to8}, %ymm0 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntq %xmm1, %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi), %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi){1to2}, %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi), %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi){1to2}, %xmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntq %xmm1, %xmm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi), %xmm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi){1to2}, %xmm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi), %xmm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi){1to2}, %xmm0 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntq %xmm1, %xmm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi), %xmm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi){1to2}, %xmm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi), %xmm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi){1to2}, %xmm0 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntq %ymm1, %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi), %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi){1to4}, %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi){1to4}, %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntq %ymm1, %ymm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi), %ymm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi){1to4}, %ymm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi), %ymm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi){1to4}, %ymm0 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntq %ymm1, %ymm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi), %ymm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi){1to4}, %ymm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi), %ymm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi){1to4}, %ymm0 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avxgfni.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avxgfni.s index 9e0ca1373214e..b8fec0d6d8341 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avxgfni.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avxgfni.s @@ -58,19 +58,19 @@ vgf2p8mulb (%rax), %ymm1, %ymm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 6.00 6.00 2.00 2.00 - - - - - - - 2.00 - +# CHECK-NEXT: 6.00 6.00 2.00 2.00 - - - - - - 2.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineinvqb $0, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineinvqb $0, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineqb $0, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineqb $0, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8mulb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8mulb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8mulb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8mulb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8mulb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8mulb (%rax), %ymm1, %ymm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avxvnni.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avxvnni.s index 58ab6f9e8f298..2b64fedbd1f4e 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avxvnni.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avxvnni.s @@ -68,23 +68,23 @@ vpdpwssds (%rax), %ymm1, %ymm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 8.00 8.00 2.67 2.67 - - - - - - - 2.67 - +# CHECK-NEXT: 8.00 8.00 2.67 2.67 - - - - - - 2.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusds %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusds %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssds %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssds %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %ymm1, %ymm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-bmi1.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-bmi1.s index 253cbbcafee6d..9f6cf8cefd8ac 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-bmi1.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-bmi1.s @@ -93,33 +93,33 @@ tzcnt (%rax), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 2.00 15.33 4.33 4.33 - 5.33 2.00 - - - 5.33 4.33 - +# CHECK-NEXT: 2.00 15.33 4.33 4.33 - 5.33 2.00 - - - 4.33 5.33 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - andnl %eax, %ebx, %ecx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - andnl %eax, %ebx, %ecx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - andnl (%rax), %ebx, %ecx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - andnq %rax, %rbx, %rcx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - andnq %rax, %rbx, %rcx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - andnq (%rax), %rbx, %rcx # CHECK-NEXT: 0.50 1.00 - - - - 0.50 - - - - - - bextrl %eax, %ebx, %ecx -# CHECK-NEXT: 0.50 1.00 0.33 0.33 - - 0.50 - - - - 0.33 - bextrl %eax, (%rbx), %ecx +# CHECK-NEXT: 0.50 1.00 0.33 0.33 - - 0.50 - - - 0.33 - - bextrl %eax, (%rbx), %ecx # CHECK-NEXT: 0.50 1.00 - - - - 0.50 - - - - - - bextrq %rax, %rbx, %rcx -# CHECK-NEXT: 0.50 1.00 0.33 0.33 - - 0.50 - - - - 0.33 - bextrq %rax, (%rbx), %rcx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - blsil %eax, %ecx +# CHECK-NEXT: 0.50 1.00 0.33 0.33 - - 0.50 - - - 0.33 - - bextrq %rax, (%rbx), %rcx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - blsil %eax, %ecx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - blsil (%rax), %ecx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - blsiq %rax, %rcx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - blsiq %rax, %rcx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - blsiq (%rax), %rcx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - blsmskl %eax, %ecx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - blsmskl %eax, %ecx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - blsmskl (%rax), %ecx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - blsmskq %rax, %rcx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - blsmskq %rax, %rcx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - blsmskq (%rax), %rcx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - blsrl %eax, %ecx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - blsrl %eax, %ecx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - blsrl (%rax), %ecx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - blsrq %rax, %rcx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - blsrq %rax, %rcx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - blsrq (%rax), %rcx # CHECK-NEXT: - 1.00 - - - - - - - - - - - tzcntw %ax, %cx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - tzcntw (%rax), %cx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - tzcntw (%rax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - tzcntl %eax, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - tzcntl (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - tzcntl (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - tzcntq %rax, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - tzcntq (%rax), %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - tzcntq (%rax), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-bmi2.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-bmi2.s index ed6ab74a9d4e8..0ba2cd6203d2d 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-bmi2.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-bmi2.s @@ -108,39 +108,39 @@ shrx %rax, (%rbx), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 9.40 16.40 5.33 5.33 - 2.40 9.40 - - - 0.40 5.33 - +# CHECK-NEXT: 9.40 16.40 5.33 5.33 - 2.40 9.40 - - - 5.33 0.40 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 1.00 - - - - - - - - - - - bzhil %eax, %ebx, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bzhil %eax, (%rbx), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bzhil %eax, (%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - bzhiq %rax, %rbx, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bzhiq %rax, (%rbx), %rcx -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - mulxl %eax, %ebx, %ecx -# CHECK-NEXT: 0.70 1.20 0.33 0.33 - 0.20 0.70 - - - 0.20 0.33 - mulxl (%rax), %ebx, %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bzhiq %rax, (%rbx), %rcx +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - mulxl %eax, %ebx, %ecx +# CHECK-NEXT: 0.70 1.20 0.33 0.33 - 0.20 0.70 - - - 0.33 0.20 - mulxl (%rax), %ebx, %ecx # CHECK-NEXT: - 1.00 - - - 1.00 - - - - - - - mulxq %rax, %rbx, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - mulxq (%rax), %rbx, %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - mulxq (%rax), %rbx, %rcx # CHECK-NEXT: - 1.00 - - - - - - - - - - - pdepl %eax, %ebx, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - pdepl (%rax), %ebx, %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - pdepl (%rax), %ebx, %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - pdepq %rax, %rbx, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - pdepq (%rax), %rbx, %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - pdepq (%rax), %rbx, %rcx # CHECK-NEXT: - 1.00 - - - - - - - - - - - pextl %eax, %ebx, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - pextl (%rax), %ebx, %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - pextl (%rax), %ebx, %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - pextq %rax, %rbx, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - pextq (%rax), %rbx, %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - pextq (%rax), %rbx, %rcx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - rorxl $1, %eax, %ecx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - rorxl $1, (%rax), %ecx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - rorxl $1, (%rax), %ecx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - rorxq $1, %rax, %rcx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - rorxq $1, (%rax), %rcx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - rorxq $1, (%rax), %rcx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarxl %eax, %ebx, %ecx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - sarxl %eax, (%rbx), %ecx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - sarxl %eax, (%rbx), %ecx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarxq %rax, %rbx, %rcx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - sarxq %rax, (%rbx), %rcx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - sarxq %rax, (%rbx), %rcx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlxl %eax, %ebx, %ecx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - shlxl %eax, (%rbx), %ecx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - shlxl %eax, (%rbx), %ecx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlxq %rax, %rbx, %rcx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - shlxq %rax, (%rbx), %rcx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - shlxq %rax, (%rbx), %rcx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrxl %eax, %ebx, %ecx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - shrxl %eax, (%rbx), %ecx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - shrxl %eax, (%rbx), %ecx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrxq %rax, %rbx, %rcx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - shrxq %rax, (%rbx), %rcx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - shrxq %rax, (%rbx), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-clflushopt.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-clflushopt.s index ba11eea9ce638..e7a06f8051158 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-clflushopt.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-clflushopt.s @@ -31,8 +31,8 @@ clflushopt (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 0.20 - - +# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 - 0.20 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 0.20 - - clflushopt (%rax) +# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 - 0.20 - clflushopt (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-clwb.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-clwb.s index 06192b2184ee8..f5f7ce7d8f52a 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-clwb.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-clwb.s @@ -31,8 +31,8 @@ clwb (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 0.20 - - +# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 - 0.20 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 0.20 - - clwb (%rax) +# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 - 0.20 - clwb (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-cmov.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-cmov.s index f7b17a75b5566..a305a4badaca5 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-cmov.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-cmov.s @@ -226,7 +226,7 @@ cmovgq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 48.00 - 16.00 16.00 - - 48.00 - - - - 16.00 - +# CHECK-NEXT: 48.00 - 16.00 16.00 - - 48.00 - - - 16.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -246,22 +246,22 @@ cmovgq (%rax), %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovgew %si, %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovlew %si, %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovgw %si, %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovow (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnow (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovbw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovaew (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovew (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnew (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovbew (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovaw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovsw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnsw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovpw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnpw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovlw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovgew (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovlew (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovgw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovow (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnow (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovbw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovaew (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovew (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnew (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovbew (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovaw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovsw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnsw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovpw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnpw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovlw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovgew (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovlew (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovgw (%rax), %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovol %esi, %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovnol %esi, %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovbl %esi, %edi @@ -278,22 +278,22 @@ cmovgq (%rax), %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovgel %esi, %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovlel %esi, %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovgl %esi, %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovol (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnol (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovbl (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovael (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovel (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnel (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovbel (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmoval (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovsl (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnsl (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovpl (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnpl (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovll (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovgel (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovlel (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovgl (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovol (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnol (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovbl (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovael (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovel (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnel (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovbel (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmoval (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovsl (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnsl (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovpl (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnpl (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovll (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovgel (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovlel (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovgl (%rax), %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovoq %rsi, %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovnoq %rsi, %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovbq %rsi, %rdi @@ -310,19 +310,19 @@ cmovgq (%rax), %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovgeq %rsi, %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovleq %rsi, %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovgq %rsi, %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovoq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnoq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovbq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovaeq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmoveq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovneq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovbeq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovaq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovsq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnsq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovpq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnpq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovlq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovgeq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovleq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovgq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovoq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnoq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovbq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovaeq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmoveq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovneq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovbeq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovaq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovsq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnsq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovpq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnpq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovlq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovgeq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovleq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovgq (%rax), %rdi diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-cmpxchg.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-cmpxchg.s index 07711da6b4886..2af2da9a6fa02 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-cmpxchg.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-cmpxchg.s @@ -37,11 +37,11 @@ lock cmpxchg16b (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 21.40 10.40 1.33 1.33 2.00 10.40 17.40 2.00 2.00 2.00 4.40 1.33 - +# CHECK-NEXT: 21.40 10.40 1.33 1.33 2.00 10.40 17.40 2.00 2.00 2.00 1.33 4.40 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 4.30 2.80 0.33 0.33 0.50 0.80 4.30 0.50 0.50 0.50 0.80 0.33 - cmpxchg8b (%rax) -# CHECK-NEXT: 6.40 2.40 0.33 0.33 0.50 4.40 4.40 0.50 0.50 0.50 1.40 0.33 - cmpxchg16b (%rax) -# CHECK-NEXT: 4.30 2.80 0.33 0.33 0.50 0.80 4.30 0.50 0.50 0.50 0.80 0.33 - lock cmpxchg8b (%rax) -# CHECK-NEXT: 6.40 2.40 0.33 0.33 0.50 4.40 4.40 0.50 0.50 0.50 1.40 0.33 - lock cmpxchg16b (%rax) +# CHECK-NEXT: 4.30 2.80 0.33 0.33 0.50 0.80 4.30 0.50 0.50 0.50 0.33 0.80 - cmpxchg8b (%rax) +# CHECK-NEXT: 6.40 2.40 0.33 0.33 0.50 4.40 4.40 0.50 0.50 0.50 0.33 1.40 - cmpxchg16b (%rax) +# CHECK-NEXT: 4.30 2.80 0.33 0.33 0.50 0.80 4.30 0.50 0.50 0.50 0.33 0.80 - lock cmpxchg8b (%rax) +# CHECK-NEXT: 6.40 2.40 0.33 0.33 0.50 4.40 4.40 0.50 0.50 0.50 0.33 1.40 - lock cmpxchg16b (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-f16c.s index 2c05d5ae3b888..eaf6190e11391 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-f16c.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-f16c.s @@ -48,14 +48,14 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 4.00 4.00 0.67 0.67 1.00 4.00 - 1.00 1.00 1.00 - 0.67 - +# CHECK-NEXT: 4.00 4.00 0.67 0.67 1.00 4.00 - 1.00 1.00 1.00 0.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtph2ps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtph2ps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtph2ps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtph2ps %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtph2ps (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2ph $0, %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - 0.50 - - 0.50 0.50 0.50 - - - vcvtps2ph $0, %xmm0, (%rax) # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2ph $0, %ymm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-fma.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-fma.s index a01ac71efec20..97d2e70fcfd43 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-fma.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-fma.s @@ -508,199 +508,199 @@ vfnmsub231ss (%rax), %xmm1, %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 96.00 96.00 32.00 32.00 - - - - - - - 32.00 - +# CHECK-NEXT: 96.00 96.00 32.00 32.00 - - - - - - 32.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub132pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub132pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub213pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub213pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub231pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub231pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub132ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub132ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub213ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub213ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub231ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub231ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub132pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub132pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub213pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub213pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub231pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub231pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub132ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub132ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub213ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub213ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub231ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub231ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub132sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub132sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub213sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub213sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub231sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub231sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub132ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub132ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub213ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub213ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub231ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub231ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd132pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd132pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd213pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd213pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd231pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd231pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd132ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd132ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd213ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd213ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd231ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd231ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd132pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd132pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd213pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd213pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd231pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd231pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd132ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd132ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd213ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd213ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd231ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd231ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd132sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd132sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd213sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd213sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd231sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd231sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd132ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd132ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd213ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd213ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd231ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd231ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub132pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub132pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub213pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub213pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub231pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub231pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub132ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub132ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub213ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub213ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub231ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub231ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub132sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub132sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub213sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub213sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub231sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub231sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub132ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub132ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub213ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub213ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub231ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub231ss (%rax), %xmm1, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-gfni.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-gfni.s index 097af7bccaf74..84e2819559362 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-gfni.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-gfni.s @@ -43,13 +43,13 @@ gf2p8mulb (%rax), %xmm1 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 3.00 3.00 1.00 1.00 - - - - - - - 1.00 - +# CHECK-NEXT: 3.00 3.00 1.00 1.00 - - - - - - 1.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - gf2p8affineinvqb $0, %xmm0, %xmm1 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - gf2p8affineinvqb $0, (%rax), %xmm1 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - gf2p8affineinvqb $0, (%rax), %xmm1 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - gf2p8affineqb $0, %xmm0, %xmm1 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - gf2p8affineqb $0, (%rax), %xmm1 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - gf2p8affineqb $0, (%rax), %xmm1 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - gf2p8mulb %xmm0, %xmm1 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - gf2p8mulb (%rax), %xmm1 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - gf2p8mulb (%rax), %xmm1 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-lea.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-lea.s index 633c5994ae530..e05a540eccb43 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-lea.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-lea.s @@ -301,142 +301,142 @@ lea 1024(%rax, %rbx, 2), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 9.00 144.00 - - - 9.00 9.00 - - - 9.00 - - +# CHECK-NEXT: 9.00 144.00 - - - 9.00 9.00 - - - - 9.00 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 0, %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 0, %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 0, %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 0, %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%eax), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%eax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%eax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%eax), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%rax), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%rax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%rax), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (,%ebx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (,%ebx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (,%ebx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (,%ebx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (,%rbx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (,%rbx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (,%rbx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (,%rbx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%eax,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%eax,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%eax,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%eax,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%rax,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%rax,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%rax,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%rax,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%eax,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%eax,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%eax,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%eax,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%rax,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%rax,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%rax,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%rax,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%eax,%ebx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%eax,%ebx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%eax,%ebx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%eax,%ebx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%rax,%rbx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%rax,%rbx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%rax,%rbx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%rax,%rbx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16, %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16, %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16, %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16, %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%eax), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%eax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%eax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%eax), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%rax), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%rax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%rax), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(,%ebx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(,%ebx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(,%ebx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(,%ebx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(,%rbx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(,%rbx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(,%rbx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(,%rbx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%eax,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%eax,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%eax,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%eax,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%rax,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%rax,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%rax,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%rax,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%eax,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%eax,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%eax,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%eax,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%rax,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%rax,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%rax,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%rax,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%eax,%ebx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%eax,%ebx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%eax,%ebx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%eax,%ebx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%rax,%rbx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%rax,%rbx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%rax,%rbx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%rax,%rbx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024, %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024, %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024, %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024, %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%eax), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%eax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%eax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%eax), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%rax), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%rax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%rax), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(,%ebx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(,%ebx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(,%ebx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(,%ebx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(,%rbx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(,%rbx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(,%rbx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(,%rbx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%eax,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%eax,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%eax,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%eax,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%rax,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%rax,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%rax,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%rax,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%eax,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%eax,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%eax,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%eax,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%rax,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%rax,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%rax,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%rax,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%eax,%ebx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%eax,%ebx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%eax,%ebx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%eax,%ebx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%rax,%rbx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%rax,%rbx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%rax,%rbx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%rax,%rbx,2), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-lzcnt.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-lzcnt.s index e6a480ca72de6..cb256ab1f5df8 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-lzcnt.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-lzcnt.s @@ -43,13 +43,13 @@ lzcntq (%rax), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - 6.00 1.00 1.00 - - - - - - - 1.00 - +# CHECK-NEXT: - 6.00 1.00 1.00 - - - - - - 1.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 1.00 - - - - - - - - - - - lzcntw %cx, %cx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - lzcntw (%rax), %cx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - lzcntw (%rax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - lzcntl %eax, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - lzcntl (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - lzcntl (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - lzcntq %rax, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - lzcntq (%rax), %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - lzcntq (%rax), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-mmx.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-mmx.s index 40f046388cbf1..9fe4880b7c613 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-mmx.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-mmx.s @@ -287,112 +287,112 @@ pxor (%rax), %mm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 75.50 - 15.33 15.33 1.00 40.00 0.50 1.00 1.00 1.00 - 15.33 - +# CHECK-NEXT: 75.50 - 15.33 15.33 1.00 40.00 0.50 1.00 1.00 1.00 15.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 5.50 - - - - 4.00 0.50 - - - - - - emms # CHECK-NEXT: - - - - - 1.00 - - - - - - - movd %eax, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movd (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movd (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - movd %mm0, %ecx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movd %mm0, (%rax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - movq %rax, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movq (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movq (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - movq %mm0, %rcx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movq %mm0, (%rax) # CHECK-NEXT: - - - - - 2.00 - - - - - - - packsswb %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - packsswb (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - packsswb (%rax), %mm2 # CHECK-NEXT: - - - - - 2.00 - - - - - - - packssdw %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - packssdw (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - packssdw (%rax), %mm2 # CHECK-NEXT: - - - - - 2.00 - - - - - - - packuswb %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - packuswb (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - packuswb (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - paddb %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - paddb (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - paddb (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - paddd %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - paddd (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - paddd (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - paddsb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - paddsb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - paddsb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - paddsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - paddsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - paddsw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - paddusb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - paddusb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - paddusb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - paddusw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - paddusw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - paddusw (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - paddw %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - paddw (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - paddw (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - pand %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - pand (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - pand (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - pandn %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - pandn (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - pandn (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpeqb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpeqb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpeqb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpeqd %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpeqd (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpeqd (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpeqw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpeqw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpeqw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpgtb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpgtb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpgtb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpgtd %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpgtd (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpgtd (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpgtw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpgtw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpgtw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmaddwd %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmaddwd (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmaddwd (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmulhw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmulhw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmulhw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmullw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmullw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmullw (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - por %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - por (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - por (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pslld $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pslld %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pslld (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pslld (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psllq $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psllq %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psllq (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psllq (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psllw $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psllw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psllw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psllw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrad $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrad %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psrad (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psrad (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psraw $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psraw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psraw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psraw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrld $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrld %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psrld (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psrld (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrlq $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrlq %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psrlq (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psrlq (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrlw $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrlw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psrlw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psrlw (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - psubb %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - psubb (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - psubb (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - psubd %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - psubd (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - psubd (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psubsb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psubsb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psubsb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psubsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psubsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psubsw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psubusb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psubusb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psubusb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psubusw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psubusw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psubusw (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - psubw %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - psubw (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - psubw (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - punpckhbw %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - punpckhbw (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - punpckhbw (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - punpckhdq %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - punpckhdq (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - punpckhdq (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - punpckhwd %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - punpckhwd (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - punpckhwd (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - punpcklbw %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - punpcklbw (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - punpcklbw (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - punpckldq %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - punpckldq (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - punpckldq (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - punpcklwd %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - punpcklwd (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - punpcklwd (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - pxor %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - pxor (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - pxor (%rax), %mm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-movbe.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-movbe.s index c9a6eda7df95e..87fc2921ccc87 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-movbe.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-movbe.s @@ -43,13 +43,13 @@ movbe (%rax), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 2.20 4.20 1.00 1.00 1.50 0.20 2.20 1.50 1.50 1.50 0.20 1.00 - +# CHECK-NEXT: 2.20 4.20 1.00 1.00 1.50 0.20 2.20 1.50 1.50 1.50 1.00 0.20 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 - - - 0.50 - 0.50 0.50 0.50 0.50 - - - movbew %cx, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 - 0.20 0.70 - - - 0.20 0.33 - movbew (%rax), %cx +# CHECK-NEXT: 0.70 0.20 0.33 0.33 - 0.20 0.70 - - - 0.33 0.20 - movbew (%rax), %cx # CHECK-NEXT: - 1.00 - - 0.50 - - 0.50 0.50 0.50 - - - movbel %ecx, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - movbel (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - movbel (%rax), %ecx # CHECK-NEXT: 0.50 1.00 - - 0.50 - 0.50 0.50 0.50 0.50 - - - movbeq %rcx, (%rax) -# CHECK-NEXT: 0.50 1.00 0.33 0.33 - - 0.50 - - - - 0.33 - movbeq (%rax), %rcx +# CHECK-NEXT: 0.50 1.00 0.33 0.33 - - 0.50 - - - 0.33 - - movbeq (%rax), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-pclmul.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-pclmul.s index 5ce1dc32042b2..f16f2516d4c25 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-pclmul.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-pclmul.s @@ -33,9 +33,9 @@ pclmulqdq $11, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - pclmulqdq $11, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - pclmulqdq $11, (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - pclmulqdq $11, (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-popcnt.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-popcnt.s index cf827bd06cc59..d82c3da8b06c0 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-popcnt.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-popcnt.s @@ -43,13 +43,13 @@ popcntq (%rax), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - 6.00 1.00 1.00 - - - - - - - 1.00 - +# CHECK-NEXT: - 6.00 1.00 1.00 - - - - - - 1.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 1.00 - - - - - - - - - - - popcntw %cx, %cx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - popcntw (%rax), %cx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - popcntw (%rax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - popcntl %eax, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - popcntl (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - popcntl (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - popcntq %rax, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - popcntq (%rax), %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - popcntq (%rax), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-prefetchw.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-prefetchw.s index 590ed99d2bbea..292f545e60d4d 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-prefetchw.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-prefetchw.s @@ -33,9 +33,9 @@ prefetchw (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - - 0.67 0.67 - - - - - - - 0.67 - +# CHECK-NEXT: - - 0.67 0.67 - - - - - - 0.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - prefetch (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - prefetchw (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - prefetch (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - prefetchw (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-rdrand.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-rdrand.s index ba91084994055..87bee6e8109a7 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-rdrand.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-rdrand.s @@ -35,10 +35,10 @@ rdrand %rax # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 17.30 27.30 1.00 1.00 - 14.30 11.30 - - - 1.80 1.00 - +# CHECK-NEXT: 17.30 27.30 1.00 1.00 - 14.30 11.30 - - - 1.00 1.80 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 5.77 9.10 0.33 0.33 - 4.77 3.77 - - - 0.60 0.33 - rdrandw %ax -# CHECK-NEXT: 5.77 9.10 0.33 0.33 - 4.77 3.77 - - - 0.60 0.33 - rdrandl %eax -# CHECK-NEXT: 5.77 9.10 0.33 0.33 - 4.77 3.77 - - - 0.60 0.33 - rdrandq %rax +# CHECK-NEXT: 5.77 9.10 0.33 0.33 - 4.77 3.77 - - - 0.33 0.60 - rdrandw %ax +# CHECK-NEXT: 5.77 9.10 0.33 0.33 - 4.77 3.77 - - - 0.33 0.60 - rdrandl %eax +# CHECK-NEXT: 5.77 9.10 0.33 0.33 - 4.77 3.77 - - - 0.33 0.60 - rdrandq %rax diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-rdseed.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-rdseed.s index 9b0904200c9cf..155486864472e 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-rdseed.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-rdseed.s @@ -35,10 +35,10 @@ rdseed %rax # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 19.50 24.00 1.00 1.00 - 18.00 10.50 - - - - 1.00 - +# CHECK-NEXT: 19.50 24.00 1.00 1.00 - 18.00 10.50 - - - 1.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 6.50 8.00 0.33 0.33 - 6.00 3.50 - - - - 0.33 - rdseedw %ax -# CHECK-NEXT: 6.50 8.00 0.33 0.33 - 6.00 3.50 - - - - 0.33 - rdseedl %eax -# CHECK-NEXT: 6.50 8.00 0.33 0.33 - 6.00 3.50 - - - - 0.33 - rdseedq %rax +# CHECK-NEXT: 6.50 8.00 0.33 0.33 - 6.00 3.50 - - - 0.33 - - rdseedw %ax +# CHECK-NEXT: 6.50 8.00 0.33 0.33 - 6.00 3.50 - - - 0.33 - - rdseedl %eax +# CHECK-NEXT: 6.50 8.00 0.33 0.33 - 6.00 3.50 - - - 0.33 - - rdseedq %rax diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse1.s index 348eb91753d92..4295a872c25bf 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse1.s @@ -336,131 +336,131 @@ xorps (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 74.00 24.50 19.33 19.33 5.00 30.50 1.00 5.00 5.00 5.00 - 19.33 - +# CHECK-NEXT: 74.00 24.50 19.33 19.33 5.00 30.50 1.00 5.00 5.00 5.00 19.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - addps %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - addps (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - addps (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - addss %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - addss (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - addss (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - andnps %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - andnps (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - andnps (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - andps %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - andps (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - andps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cmpeqps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cmpeqps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cmpeqps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cmpeqss %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cmpeqss (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cmpeqss (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - comiss %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - comiss (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - comiss (%rax), %xmm1 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvtpi2ps %mm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtpi2ps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtpi2ps (%rax), %xmm2 # CHECK-NEXT: 1.33 0.33 - - - 0.33 - - - - - - - cvtps2pi %xmm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - cvtps2pi (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - cvtps2pi (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtsi2ss %ecx, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 2.00 - - - - - - - cvtsi2ss %rcx, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtsi2ssl (%rax), %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - cvtsi2ssq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtsi2ssl (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - cvtsi2ssq (%rax), %xmm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvtss2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - 1.00 - - - - - - - cvtss2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtss2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtss2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtss2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtss2si (%rax), %rcx # CHECK-NEXT: 1.33 0.33 - - - 0.33 - - - - - - - cvttps2pi %xmm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - cvttps2pi (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - cvttps2pi (%rax), %mm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvttss2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - 1.00 - - - - - - - cvttss2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvttss2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvttss2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvttss2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvttss2si (%rax), %rcx # CHECK-NEXT: 1.00 - - - - - - - - - - - - divps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - divps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - divps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - divss %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - divss (%rax), %xmm2 -# CHECK-NEXT: 1.83 0.33 0.33 0.33 - 0.33 0.50 - - - - 0.33 - ldmxcsr (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - divss (%rax), %xmm2 +# CHECK-NEXT: 1.83 0.33 0.33 0.33 - 0.33 0.50 - - - 0.33 - - ldmxcsr (%rax) # CHECK-NEXT: 2.00 - - - 0.50 - - 0.50 0.50 0.50 - - - maskmovq %mm0, %mm1 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - maxps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - maxps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - maxps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - maxss %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - maxss (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - maxss (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - minps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - minps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - minps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - minss %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - minss (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - minss (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - movaps %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movaps %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movaps (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movaps (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - movhlps %xmm0, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - movlhps %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movhps %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - movhps (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - movhps (%rax), %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movlps %xmm0, (%rax) -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - movlps (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - movlps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - movmskps %xmm0, %ecx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movntps %xmm0, (%rax) # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movntq %mm0, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - movss %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movss %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movss (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movss (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - movups %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movups %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movups (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movups (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - mulps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - mulps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - mulps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - mulss %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - mulss (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - mulss (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - orps %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - orps (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - orps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pavgb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pavgb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pavgb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pavgw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pavgw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pavgw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - pextrw $1, %mm0, %ecx # CHECK-NEXT: - - - - - 2.00 - - - - - - - pinsrw $1, %eax, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - pinsrw $1, (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - pinsrw $1, (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmaxsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmaxsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmaxsw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmaxub %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmaxub (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmaxub (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pminsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pminsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pminsw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pminub %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pminub (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pminub (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmovmskb %mm0, %ecx # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmulhuw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmulhuw (%rax), %mm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - prefetcht0 (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - prefetcht1 (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - prefetcht2 (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - prefetchnta (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmulhuw (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - prefetcht0 (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - prefetcht1 (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - prefetcht2 (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - prefetchnta (%rax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - psadbw %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - psadbw (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - psadbw (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - pshufw $1, %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - pshufw $1, (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - pshufw $1, (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - rcpps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - rcpps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - rcpps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - rcpss %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - rcpss (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - rcpss (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - rsqrtps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - rsqrtps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - rsqrtps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - rsqrtss %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - rsqrtss (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - rsqrtss (%rax), %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - sfence # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - shufps $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - shufps $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - shufps $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - sqrtps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - sqrtps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - sqrtps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - sqrtss %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - sqrtss (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - sqrtss (%rax), %xmm2 # CHECK-NEXT: 1.50 - - - 0.50 - 0.50 0.50 0.50 0.50 - - - stmxcsr (%rax) # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - subps %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - subps (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - subps (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - subss %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - subss (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - subss (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - ucomiss %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - ucomiss (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - ucomiss (%rax), %xmm1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - unpckhps %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - unpckhps (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - unpckhps (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - unpcklps %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - unpcklps (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - unpcklps (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - xorps %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - xorps (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - xorps (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse2.s index 10e53a8df89f7..2b37853229def 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse2.s @@ -692,96 +692,96 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 107.70 99.20 39.00 39.00 9.00 71.70 1.20 7.50 7.50 8.00 0.20 39.00 - +# CHECK-NEXT: 107.70 99.20 39.00 39.00 9.00 71.70 1.20 7.50 7.50 8.00 39.00 0.20 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - addpd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - addpd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - addpd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - addsd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - addsd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - addsd (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - andnpd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - andnpd (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - andnpd (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - andpd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - andpd (%rax), %xmm2 -# CHECK-NEXT: 0.70 0.20 - - 0.50 0.20 0.70 0.50 0.50 0.50 0.20 - - clflush (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - andpd (%rax), %xmm2 +# CHECK-NEXT: 0.70 0.20 - - 0.50 0.20 0.70 0.50 0.50 0.50 - 0.20 - clflush (%rax) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cmpeqpd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cmpeqpd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cmpeqpd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cmpeqsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cmpeqsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cmpeqsd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - comisd %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - comisd (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - comisd (%rax), %xmm1 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtdq2pd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cvtdq2ps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtdq2ps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtdq2ps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - cvtpd2dq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - cvtpd2dq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtpd2pi %xmm0, %mm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - cvtpd2pi (%rax), %mm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - cvtpd2pi (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - cvtpd2ps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - cvtpd2ps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtpi2pd %mm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtpi2pd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtpi2pd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cvtps2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtps2dq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtps2dq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtps2pd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtps2pd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtps2pd (%rax), %xmm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvtsd2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvtsd2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtsd2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtsd2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtsd2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtsd2si (%rax), %rcx # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtsd2ss %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - cvtsd2ss (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - cvtsd2ss (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtsi2sd %ecx, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtsi2sd %rcx, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtsi2sdl (%rax), %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtsi2sdq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtsi2sdl (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtsi2sdq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtss2sd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtss2sd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtss2sd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvttpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - cvttpd2dq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - cvttpd2dq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvttpd2pi %xmm0, %mm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - cvttpd2pi (%rax), %mm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - cvttpd2pi (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cvttps2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvttps2dq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvttps2dq (%rax), %xmm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvttsd2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvttsd2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvttsd2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvttsd2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvttsd2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvttsd2si (%rax), %rcx # CHECK-NEXT: 1.00 - - - - - - - - - - - - divpd %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - divpd (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - divpd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - divsd %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - divsd (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - divsd (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - lfence # CHECK-NEXT: - - - - 1.50 - - - - 0.50 - - - maskmovdqu %xmm0, %xmm1 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - maxpd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - maxpd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - maxpd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - maxsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - maxsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - maxsd (%rax), %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - mfence # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - minpd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - minpd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - minpd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - minsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - minsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - minsd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - movapd %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movapd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movapd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movapd (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - movd %eax, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - movd %xmm0, %ecx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movd %xmm0, (%rax) # CHECK-NEXT: - - - - - - - - - - - - - movdqa %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movdqa %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movdqa (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movdqa (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - movdqu %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movdqu %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movdqu (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movdqu (%rax), %xmm2 # CHECK-NEXT: 0.83 0.33 - - - 0.83 - - - - - - - movdq2q %xmm0, %mm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movhpd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - movhpd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - movhpd (%rax), %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movlpd %xmm0, (%rax) -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - movlpd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - movlpd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - movmskpd %xmm0, %ecx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movntil %eax, (%rax) # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movntiq %rax, (%rax) @@ -789,177 +789,177 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movntpd %xmm0, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - movq %xmm0, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - movq %rax, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movq (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movq (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - movq %xmm0, %rcx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movq %xmm0, (%rax) # CHECK-NEXT: 1.33 0.33 - - - 0.33 - - - - - - - movq2dq %mm0, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - movsd %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movsd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movsd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movsd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - movupd %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movupd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movupd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movupd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - mulpd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - mulpd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - mulpd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - mulsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - mulsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - mulsd (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - orpd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - orpd (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - orpd (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - packssdw %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - packssdw (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - packssdw (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - packsswb %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - packsswb (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - packsswb (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - packuswb %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - packuswb (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - packuswb (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - paddb %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - paddb (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - paddb (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - paddd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - paddd (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - paddd (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - paddq %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - paddq (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - paddq (%rax), %mm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - paddq %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - paddq (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - paddq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - paddsb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - paddsb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - paddsb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - paddsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - paddsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - paddsw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - paddusb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - paddusb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - paddusb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - paddusw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - paddusw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - paddusw (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - paddw %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - paddw (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - paddw (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - pand %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - pand (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - pand (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - pandn %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - pandn (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - pandn (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pavgb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pavgb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pavgb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pavgw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pavgw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pavgw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpeqb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpeqb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpeqb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpeqd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpeqd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpeqd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpeqw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpeqw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpeqw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpgtb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpgtb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpgtb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpgtd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpgtd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpgtd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpgtw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpgtw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpgtw (%rax), %xmm2 # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - pextrw $1, %xmm0, %ecx # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - pinsrw $1, %eax, %xmm0 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pinsrw $1, (%rax), %xmm0 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pinsrw $1, (%rax), %xmm0 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaddwd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaddwd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaddwd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaxsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaxsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaxsw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaxub %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaxub (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaxub (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pminsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pminsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pminsw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pminub %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pminub (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pminub (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmovmskb %xmm0, %ecx # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmulhuw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmulhuw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmulhuw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmulhw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmulhw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmulhw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmullw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmullw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmullw (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmuludq %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmuludq (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmuludq (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmuludq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmuludq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmuludq (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - por %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - por (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - por (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - psadbw %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - psadbw (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - psadbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pshufd $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pshufd $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pshufd $1, (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pshufhw $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pshufhw $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pshufhw $1, (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pshuflw $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pshuflw $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pshuflw $1, (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pslld $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - pslld %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pslld (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pslld (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pslldq $1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psllq $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psllq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psllq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psllq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psllw $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psllw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psllw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psllw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psrad $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psrad %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psrad (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psrad (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psraw $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psraw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psraw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psraw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psrld $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psrld %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psrld (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psrld (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - psrldq $1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psrlq $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psrlq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psrlq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psrlq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psrlw $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psrlw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psrlw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psrlw (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - psubb %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - psubb (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - psubb (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - psubd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - psubd (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - psubd (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - psubq %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - psubq (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - psubq (%rax), %mm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - psubq %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - psubq (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - psubq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psubsb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psubsb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psubsb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psubsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psubsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psubsw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psubusb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psubusb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psubusb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psubusw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psubusw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psubusw (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - psubw %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - psubw (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - psubw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpckhbw %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpckhbw (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpckhbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpckhdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpckhdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpckhdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpckhqdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpckhqdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpckhqdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpckhwd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpckhwd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpckhwd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpcklbw %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpcklbw (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpcklbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpckldq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpckldq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpckldq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpcklqdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpcklqdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpcklqdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpcklwd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpcklwd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpcklwd (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - pxor %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - pxor (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - pxor (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - shufpd $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - shufpd $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - shufpd $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - sqrtpd %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - sqrtpd (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - sqrtpd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - sqrtsd %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - sqrtsd (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - sqrtsd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - subpd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - subpd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - subpd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - subsd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - subsd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - subsd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - ucomisd %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - ucomisd (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - ucomisd (%rax), %xmm1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - unpckhpd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - unpckhpd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - unpckhpd (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - unpcklpd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - unpcklpd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - unpcklpd (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - xorpd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - xorpd (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - xorpd (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse3.s index daa8edd85b37c..445015d9c430a 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse3.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse3.s @@ -82,28 +82,28 @@ mwait # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 2.00 9.00 3.33 3.33 - 27.00 4.00 - - - - 3.33 - +# CHECK-NEXT: 2.00 9.00 3.33 3.33 - 27.00 4.00 - - - 3.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - addsubpd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - addsubpd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - addsubpd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - addsubps %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - addsubps (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - addsubps (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - haddpd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - haddpd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - haddpd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - haddps %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - haddps (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - haddps (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - hsubpd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - hsubpd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - hsubpd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - hsubps %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - hsubps (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - lddqu (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - hsubps (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - lddqu (%rax), %xmm2 # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - monitor # CHECK-NEXT: - - - - - 1.00 - - - - - - - movddup %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movddup (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movddup (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - movshdup %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movshdup (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movshdup (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - movsldup %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movsldup (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movsldup (%rax), %xmm2 # CHECK-NEXT: 1.75 1.75 - - - 2.75 3.75 - - - - - - mwait diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse41.s index 02e212431cc0d..1dc78904ff963 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse41.s @@ -269,37 +269,37 @@ roundss $1, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 36.33 50.83 14.67 14.67 2.50 41.83 1.00 2.50 2.50 2.50 - 14.67 - +# CHECK-NEXT: 36.33 50.83 14.67 14.67 2.50 41.83 1.00 2.50 2.50 2.50 14.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - blendpd $11, %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - blendpd $11, (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - blendpd $11, (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - blendps $11, %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - blendps $11, (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - blendps $11, (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - blendvpd %xmm0, %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - blendvpd %xmm0, (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - blendvpd %xmm0, (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - blendvps %xmm0, %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - blendvps %xmm0, (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - blendvps %xmm0, (%rax), %xmm2 # CHECK-NEXT: 1.00 1.50 - - - 0.50 - - - - - - - dppd $22, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.50 0.33 0.33 - 0.50 - - - - - 0.33 - dppd $22, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.50 0.33 0.33 - 0.50 - - - - 0.33 - - dppd $22, (%rax), %xmm2 # CHECK-NEXT: 1.50 2.00 - - - 2.00 0.50 - - - - - - dpps $22, %xmm0, %xmm2 -# CHECK-NEXT: 1.50 2.00 0.33 0.33 - 2.00 0.50 - - - - 0.33 - dpps $22, (%rax), %xmm2 +# CHECK-NEXT: 1.50 2.00 0.33 0.33 - 2.00 0.50 - - - 0.33 - - dpps $22, (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - extractps $1, %xmm0, %ecx # CHECK-NEXT: - - - - 0.50 1.00 - 0.50 0.50 0.50 - - - extractps $1, %xmm0, (%rax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - insertps $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - insertps $1, (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movntdqa (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - insertps $1, (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movntdqa (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - mpsadbw $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 1.50 - - - - - 0.33 - mpsadbw $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 1.50 - - - - 0.33 - - mpsadbw $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - packusdw %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - packusdw (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - packusdw (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - pblendvb %xmm0, %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - pblendvb %xmm0, (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - pblendvb %xmm0, (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pblendw $11, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pblendw $11, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pblendw $11, (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpeqq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpeqq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpeqq (%rax), %xmm2 # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - pextrb $1, %xmm0, %ecx # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - pextrb $1, %xmm0, (%rax) # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - pextrd $1, %xmm0, %ecx @@ -308,64 +308,64 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - pextrq $1, %xmm0, (%rax) # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - pextrw $1, %xmm0, (%rax) # CHECK-NEXT: 1.00 - - - - - - - - - - - - phminposuw %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - phminposuw (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - phminposuw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - pinsrb $1, %eax, %xmm1 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pinsrb $1, (%rax), %xmm1 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pinsrb $1, (%rax), %xmm1 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - pinsrd $1, %eax, %xmm1 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pinsrd $1, (%rax), %xmm1 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pinsrd $1, (%rax), %xmm1 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - pinsrq $1, %rax, %xmm1 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pinsrq $1, (%rax), %xmm1 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pinsrq $1, (%rax), %xmm1 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaxsb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaxsb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaxsb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaxsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaxsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaxsd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaxud %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaxud (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaxud (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaxuw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaxuw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaxuw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pminsb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pminsb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pminsb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pminsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pminsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pminsd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pminud %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pminud (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pminud (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pminuw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pminuw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pminuw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovsxbd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovsxbd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovsxbd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovsxbq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovsxbq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovsxbq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovsxbw %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovsxbw (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovsxbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovsxdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovsxdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovsxdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovsxwd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovsxwd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovsxwd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovsxwq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovsxwq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovsxwq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovzxbd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovzxbd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovzxbd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovzxbq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovzxbq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovzxbq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovzxbw %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovzxbw (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovzxbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovzxdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovzxdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovzxdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovzxwd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovzxwd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovzxwd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovzxwq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovzxwq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovzxwq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmuldq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmuldq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmuldq (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - pmulld %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - pmulld (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - pmulld (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - ptest %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - ptest (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - ptest (%rax), %xmm1 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - roundpd $1, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - roundpd $1, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - roundpd $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - roundps $1, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - roundps $1, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - roundps $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - roundsd $1, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - roundsd $1, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - roundsd $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - roundss $1, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - roundss $1, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - roundss $1, (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse42.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse42.s index f08243fdea724..899538f44f53d 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse42.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse42.s @@ -78,27 +78,27 @@ pcmpgtq (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 28.67 16.67 3.33 3.33 - 8.67 2.00 - - - - 3.33 - +# CHECK-NEXT: 28.67 16.67 3.33 3.33 - 8.67 2.00 - - - 3.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 1.00 - - - - - - - - - - - crc32b %al, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - crc32b (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - crc32b (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - crc32l %eax, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - crc32l (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - crc32l (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - crc32w %ax, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - crc32w (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - crc32w (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - crc32b %al, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - crc32b (%rax), %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - crc32b (%rax), %rcx # CHECK-NEXT: - 1.00 - - - - - - - - - - - crc32q %rax, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - crc32q (%rax), %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - crc32q (%rax), %rcx # CHECK-NEXT: 4.17 1.67 - - - 1.67 0.50 - - - - - - pcmpestri $1, %xmm0, %xmm2 -# CHECK-NEXT: 3.83 1.33 0.33 0.33 - 1.33 0.50 - - - - 0.33 - pcmpestri $1, (%rax), %xmm2 +# CHECK-NEXT: 3.83 1.33 0.33 0.33 - 1.33 0.50 - - - 0.33 - - pcmpestri $1, (%rax), %xmm2 # CHECK-NEXT: 4.50 2.00 - - - 2.00 0.50 - - - - - - pcmpestrm $1, %xmm0, %xmm2 -# CHECK-NEXT: 4.17 1.67 0.33 0.33 - 1.67 0.50 - - - - 0.33 - pcmpestrm $1, (%rax), %xmm2 +# CHECK-NEXT: 4.17 1.67 0.33 0.33 - 1.67 0.50 - - - 0.33 - - pcmpestrm $1, (%rax), %xmm2 # CHECK-NEXT: 3.00 - - - - - - - - - - - - pcmpistri $1, %xmm0, %xmm2 -# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpistri $1, (%rax), %xmm2 +# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpistri $1, (%rax), %xmm2 # CHECK-NEXT: 3.00 - - - - - - - - - - - - pcmpistrm $1, %xmm0, %xmm2 -# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpistrm $1, (%rax), %xmm2 +# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpistrm $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - pcmpgtq %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - pcmpgtq (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - pcmpgtq (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-ssse3.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-ssse3.s index 565eaaab6c513..4578e37127595 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-ssse3.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-ssse3.s @@ -188,71 +188,71 @@ psignw (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 38.67 25.67 10.67 10.67 - 49.67 - - - - - 10.67 - +# CHECK-NEXT: 38.67 25.67 10.67 10.67 - 49.67 - - - - 10.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 1.00 - - - - - - - - - - - - pabsb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pabsb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pabsb (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pabsb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pabsb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pabsb (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pabsd %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pabsd (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pabsd (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pabsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pabsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pabsd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pabsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pabsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pabsw (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pabsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pabsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pabsw (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - palignr $1, %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - palignr $1, (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - palignr $1, (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - palignr $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - palignr $1, (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - palignr $1, (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - phaddd %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - phaddd (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - phaddd (%rax), %mm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - phaddd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - phaddd (%rax), %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - phaddd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - 2.00 - - - - - - - phaddsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 2.00 - - - - - 0.33 - phaddsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 2.00 - - - - 0.33 - - phaddsw (%rax), %mm2 # CHECK-NEXT: 0.50 1.50 - - - 1.00 - - - - - - - phaddsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - - 0.33 - phaddsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - 0.33 - - phaddsw (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - phaddw %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - phaddw (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - phaddw (%rax), %mm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - phaddw %xmm0, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - phaddw (%rax), %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - phaddw (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - phsubd %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - phsubd (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - phsubd (%rax), %mm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - phsubd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - phsubd (%rax), %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - phsubd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - 2.00 - - - - - - - phsubsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 2.00 - - - - - 0.33 - phsubsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 2.00 - - - - 0.33 - - phsubsw (%rax), %mm2 # CHECK-NEXT: 0.50 1.50 - - - 1.00 - - - - - - - phsubsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - - 0.33 - phsubsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - 0.33 - - phsubsw (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - phsubw %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - phsubw (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - phsubw (%rax), %mm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - phsubw %xmm0, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - phsubw (%rax), %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - phsubw (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmaddubsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmaddubsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmaddubsw (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaddubsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaddubsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaddubsw (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmulhrsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmulhrsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmulhrsw (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmulhrsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmulhrsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmulhrsw (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - pshufb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - pshufb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - pshufb (%rax), %mm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pshufb %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pshufb (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pshufb (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psignb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psignb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psignb (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psignb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psignb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psignb (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psignd %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psignd (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psignd (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psignd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psignd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psignd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psignw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psignw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psignw (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psignw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psignw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psignw (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-vaes.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-vaes.s index 3cdda14f9d45f..919cbe60a88db 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-vaes.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-vaes.s @@ -48,15 +48,15 @@ vaesenclast (%rax), %ymm1, %ymm3 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 4.00 4.00 1.33 1.33 - - - - - - - 1.33 - +# CHECK-NEXT: 4.00 4.00 1.33 1.33 - - - - - - 1.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesdec %ymm0, %ymm1, %ymm3 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesdec (%rax), %ymm1, %ymm3 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesdec (%rax), %ymm1, %ymm3 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesdeclast %ymm0, %ymm1, %ymm3 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesdeclast (%rax), %ymm1, %ymm3 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesdeclast (%rax), %ymm1, %ymm3 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesenc %ymm0, %ymm1, %ymm3 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesenc (%rax), %ymm1, %ymm3 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesenc (%rax), %ymm1, %ymm3 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesenclast %ymm0, %ymm1, %ymm3 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesenclast (%rax), %ymm1, %ymm3 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesenclast (%rax), %ymm1, %ymm3 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-vpclmulqdq.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-vpclmulqdq.s index 1f4745a2c7b8f..8375b08f95a6b 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-vpclmulqdq.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-vpclmulqdq.s @@ -33,9 +33,9 @@ vpclmulqdq $11, (%rax), %ymm1, %ymm3 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpclmulqdq $11, %ymm0, %ymm1, %ymm3 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpclmulqdq $11, (%rax), %ymm1, %ymm3 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpclmulqdq $11, (%rax), %ymm1, %ymm3 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-x86_32.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-x86_32.s index 195908c756b84..834dfb8525bda 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-x86_32.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-x86_32.s @@ -64,7 +64,7 @@ salc # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 3.60 3.60 0.33 0.33 - 3.60 3.60 - - - 0.60 0.33 - +# CHECK-NEXT: 3.60 3.60 0.33 0.33 - 3.60 3.60 - - - 0.33 0.60 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -79,5 +79,5 @@ salc # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - daa # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - das # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - into -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - leave +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - leave # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - salc diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-x86_64.s index 1720b2d60ba90..d41abf60de183 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-x86_64.s @@ -1965,419 +1965,419 @@ xorq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 949.92 794.58 213.00 213.00 202.50 599.75 793.42 203.00 203.00 202.50 191.33 213.00 - +# CHECK-NEXT: 949.92 794.58 213.00 213.00 202.50 599.75 793.42 203.00 203.00 202.50 213.00 191.33 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcb $0, %al # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcb $0, %dil -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcb $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcb $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcb $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcb $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcb $7, %al # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcb $7, %dil -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcb $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcb $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcb $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcb $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcb %sil, %dil -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - adcb %sil, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock adcb %sil, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adcb (%rax), %dil +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - adcb %sil, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock adcb %sil, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adcb (%rax), %dil # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcw $0, %ax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcw $0, %di -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcw $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcw $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcw $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcw $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcw $511, %ax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcw $511, %di -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcw $511, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcw $511, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcw $511, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcw $511, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcw $7, %di -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcw $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcw $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcw $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcw $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcw %si, %di -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - adcw %si, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock adcw %si, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adcw (%rax), %di +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - adcw %si, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock adcw %si, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adcw (%rax), %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcl $0, %eax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcl $0, %edi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcl $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcl $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcl $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcl $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcl $665536, %eax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcl $665536, %edi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcl $665536, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcl $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcl $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcl $665536, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcl $7, %edi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcl $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcl $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcl $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcl $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcl %esi, %edi -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - adcl %esi, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock adcl %esi, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adcl (%rax), %edi +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - adcl %esi, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock adcl %esi, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adcl (%rax), %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcq $0, %rax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcq $0, %rdi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcq $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcq $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcq $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcq $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcq $665536, %rax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcq $665536, %rdi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcq $665536, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcq $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcq $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcq $665536, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcq $7, %rdi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcq $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcq $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcq $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcq $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcq %rsi, %rdi -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - adcq %rsi, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock adcq %rsi, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adcq (%rax), %rdi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - addb (%rax), %dil -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - addw (%rax), %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - addl (%rax), %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addq $665536, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - adcq %rsi, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock adcq %rsi, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adcq (%rax), %rdi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - addb (%rax), %dil +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - addw (%rax), %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - addl (%rax), %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addq $665536, (%rax) # CHECK-NEXT: - - - - - - - - - - - - - addq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - addq (%rax), %rdi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - andb (%rax), %dil -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - andw (%rax), %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - andl (%rax), %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - andq (%rax), %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - addq (%rax), %rdi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - andb (%rax), %dil +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - andw (%rax), %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - andl (%rax), %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andq $7, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - andq (%rax), %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - bsfw %si, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - bsrw %si, %di -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bsfw (%rax), %di -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bsrw (%rax), %di +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bsfw (%rax), %di +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bsrw (%rax), %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - bsfl %esi, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - bsrl %esi, %edi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bsfl (%rax), %edi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bsrl (%rax), %edi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bsfl (%rax), %edi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bsrl (%rax), %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - bsfq %rsi, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - bsrq %rsi, %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bsfq (%rax), %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bsrq (%rax), %rdi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bsfq (%rax), %rdi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bsrq (%rax), %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - bswapl %eax # CHECK-NEXT: 0.50 1.00 - - - - 0.50 - - - - - - bswapq %rax # CHECK-NEXT: - 1.00 - - - - - - - - - - - btw %si, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - btcw %si, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - btrw %si, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - btsw %si, %di -# CHECK-NEXT: 1.80 2.47 0.33 0.33 - 1.47 1.80 - - - 1.47 0.33 - btw %si, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - btcw %si, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - btrw %si, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - btsw %si, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - lock btcw %si, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - lock btrw %si, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - lock btsw %si, (%rax) +# CHECK-NEXT: 1.80 2.47 0.33 0.33 - 1.47 1.80 - - - 0.33 1.47 - btw %si, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - btcw %si, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - btrw %si, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - btsw %si, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - lock btcw %si, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - lock btrw %si, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - lock btsw %si, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - btw $7, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - btcw $7, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - btrw $7, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - btsw $7, %di -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - btw $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btcw $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btrw $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btsw $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btcw $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btrw $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btsw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - btw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btcw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btrw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btsw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btcw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btrw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btsw $7, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - btl %esi, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btcl %esi, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btrl %esi, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btsl %esi, %edi -# CHECK-NEXT: 1.80 2.47 0.33 0.33 - 1.47 1.80 - - - 1.47 0.33 - btl %esi, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - btcl %esi, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - btrl %esi, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - btsl %esi, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - lock btcl %esi, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - lock btrl %esi, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - lock btsl %esi, (%rax) +# CHECK-NEXT: 1.80 2.47 0.33 0.33 - 1.47 1.80 - - - 0.33 1.47 - btl %esi, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - btcl %esi, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - btrl %esi, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - btsl %esi, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - lock btcl %esi, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - lock btrl %esi, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - lock btsl %esi, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - btl $7, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btcl $7, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btrl $7, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btsl $7, %edi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - btl $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btcl $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btrl $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btsl $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btcl $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btrl $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btsl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - btl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btcl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btrl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btsl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btcl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btrl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btsl $7, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - btq %rsi, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btcq %rsi, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btrq %rsi, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btsq %rsi, %rdi -# CHECK-NEXT: 2.00 2.00 0.33 0.33 - 1.00 2.00 - - - 1.00 0.33 - btq %rsi, (%rax) -# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.80 0.33 - btcq %rsi, (%rax) -# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.80 0.33 - btrq %rsi, (%rax) -# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.80 0.33 - btsq %rsi, (%rax) -# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.80 0.33 - lock btcq %rsi, (%rax) -# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.80 0.33 - lock btrq %rsi, (%rax) -# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.80 0.33 - lock btsq %rsi, (%rax) +# CHECK-NEXT: 2.00 2.00 0.33 0.33 - 1.00 2.00 - - - 0.33 1.00 - btq %rsi, (%rax) +# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.33 0.80 - btcq %rsi, (%rax) +# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.33 0.80 - btrq %rsi, (%rax) +# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.33 0.80 - btsq %rsi, (%rax) +# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.33 0.80 - lock btcq %rsi, (%rax) +# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.33 0.80 - lock btrq %rsi, (%rax) +# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.33 0.80 - lock btsq %rsi, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - btq $7, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btcq $7, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btrq $7, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btsq $7, %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - btq $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btcq $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btrq $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btsq $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btcq $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btrq $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btsq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - btq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btcq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btrq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btsq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btcq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btrq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btsq $7, (%rax) # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - cbtw # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - cwtl # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - cltq -# CHECK-NEXT: 0.70 0.20 - - - 0.20 0.70 - - - 0.20 - - cwtd +# CHECK-NEXT: 0.70 0.20 - - - 0.20 0.70 - - - - 0.20 - cwtd # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cltd # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cqto # CHECK-NEXT: - - - - - - - - - - - - - clc -# CHECK-NEXT: 0.70 0.20 - - - 0.20 0.70 - - - 0.20 - - cld -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmc -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpb (%rax), %dil -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpw (%rax), %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpl (%rax), %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpq (%rax), %rdi -# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 1.00 0.67 - cmpsb %es:(%rdi), (%rsi) -# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 1.00 0.67 - cmpsw %es:(%rdi), (%rsi) -# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 1.00 0.67 - cmpsl %es:(%rdi), (%rsi) -# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 1.00 0.67 - cmpsq %es:(%rdi), (%rsi) -# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - 0.60 - - cmpxchgb %cl, %bl -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - cmpxchgb %cl, (%rbx) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - lock cmpxchgb %cl, (%rbx) -# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - 0.60 - - cmpxchgw %cx, %bx -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - cmpxchgw %cx, (%rbx) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - lock cmpxchgw %cx, (%rbx) -# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - 0.60 - - cmpxchgl %ecx, %ebx -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - cmpxchgl %ecx, (%rbx) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - lock cmpxchgl %ecx, (%rbx) -# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - 0.60 - - cmpxchgq %rcx, %rbx -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - cmpxchgq %rcx, (%rbx) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - lock cmpxchgq %rcx, (%rbx) +# CHECK-NEXT: 0.70 0.20 - - - 0.20 0.70 - - - - 0.20 - cld +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmc +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpb (%rax), %dil +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpw (%rax), %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpl (%rax), %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpq $7, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpq (%rax), %rdi +# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 0.67 1.00 - cmpsb %es:(%rdi), (%rsi) +# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 0.67 1.00 - cmpsw %es:(%rdi), (%rsi) +# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 0.67 1.00 - cmpsl %es:(%rdi), (%rsi) +# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 0.67 1.00 - cmpsq %es:(%rdi), (%rsi) +# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - - 0.60 - cmpxchgb %cl, %bl +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - cmpxchgb %cl, (%rbx) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - lock cmpxchgb %cl, (%rbx) +# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - - 0.60 - cmpxchgw %cx, %bx +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - cmpxchgw %cx, (%rbx) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - lock cmpxchgw %cx, (%rbx) +# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - - 0.60 - cmpxchgl %ecx, %ebx +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - cmpxchgl %ecx, (%rbx) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - lock cmpxchgl %ecx, (%rbx) +# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - - 0.60 - cmpxchgq %rcx, %rbx +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - cmpxchgq %rcx, (%rbx) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - lock cmpxchgq %rcx, (%rbx) # CHECK-NEXT: 7.50 6.50 - - 0.50 5.00 5.00 0.50 0.50 0.50 - - - cpuid -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - decb %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - decb (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock decb (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - decw %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - decw (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock decw (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - decl %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - decl (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock decl (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - decb %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - decb (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock decb (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - decw %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - decw (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock decw (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - decl %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - decl (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock decl (%rax) # CHECK-NEXT: - - - - - - - - - - - - - decq %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - decq (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock decq (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - decq (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock decq (%rax) # CHECK-NEXT: - 3.00 - - - - - - - - - - - divb %dil # CHECK-NEXT: - 3.00 - - - - - - - - - - - divb (%rax) -# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - 0.20 - - divw %si -# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - divw (%rax) -# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - 0.20 - - divl %edx -# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - divl (%rax) +# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - - 0.20 - divw %si +# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - divw (%rax) +# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - - 0.20 - divl %edx +# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - divl (%rax) # CHECK-NEXT: - 3.00 - - - - - - - - - - - divq %rcx -# CHECK-NEXT: - 3.00 0.33 0.33 - - - - - - - 0.33 - divq (%rax) -# CHECK-NEXT: 12.50 2.00 4.67 4.67 2.00 9.00 10.50 2.50 2.50 2.00 - 4.67 - enter $7, $4095 +# CHECK-NEXT: - 3.00 0.33 0.33 - - - - - - 0.33 - - divq (%rax) +# CHECK-NEXT: 12.50 2.00 4.67 4.67 2.00 9.00 10.50 2.50 2.50 2.00 4.67 - - enter $7, $4095 # CHECK-NEXT: - 3.00 - - - - - - - - - - - idivb %dil # CHECK-NEXT: - 3.00 - - - - - - - - - - - idivb (%rax) -# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - 0.20 - - idivw %si -# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - idivw (%rax) -# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - 0.20 - - idivl %edx -# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - idivl (%rax) +# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - - 0.20 - idivw %si +# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - idivw (%rax) +# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - - 0.20 - idivl %edx +# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - idivl (%rax) # CHECK-NEXT: - 3.00 - - - - - - - - - - - idivq %rcx -# CHECK-NEXT: - 3.00 0.33 0.33 - - - - - - - 0.33 - idivq (%rax) +# CHECK-NEXT: - 3.00 0.33 0.33 - - - - - - 0.33 - - idivq (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - imulb %dil -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imulb (%rax) -# CHECK-NEXT: 0.90 1.40 - - - 0.40 0.90 - - - 0.40 - - imulw %di -# CHECK-NEXT: 0.90 1.40 0.33 0.33 - 0.40 0.90 - - - 0.40 0.33 - imulw (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imulb (%rax) +# CHECK-NEXT: 0.90 1.40 - - - 0.40 0.90 - - - - 0.40 - imulw %di +# CHECK-NEXT: 0.90 1.40 0.33 0.33 - 0.40 0.90 - - - 0.33 0.40 - imulw (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - imulw %si, %di -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imulw (%rax), %di -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - imulw $511, %si, %di -# CHECK-NEXT: 0.20 1.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - imulw $511, (%rax), %di -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - imulw $7, %si, %di -# CHECK-NEXT: 0.20 1.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - imulw $7, (%rax), %di -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - imull %edi -# CHECK-NEXT: 0.70 1.20 0.33 0.33 - 0.20 0.70 - - - 0.20 0.33 - imull (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imulw (%rax), %di +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - imulw $511, %si, %di +# CHECK-NEXT: 0.20 1.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - imulw $511, (%rax), %di +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - imulw $7, %si, %di +# CHECK-NEXT: 0.20 1.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - imulw $7, (%rax), %di +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - imull %edi +# CHECK-NEXT: 0.70 1.20 0.33 0.33 - 0.20 0.70 - - - 0.33 0.20 - imull (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - imull %esi, %edi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imull (%rax), %edi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imull (%rax), %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - imull $665536, %esi, %edi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imull $665536, (%rax), %edi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imull $665536, (%rax), %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - imull $7, %esi, %edi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imull $7, (%rax), %edi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imull $7, (%rax), %edi # CHECK-NEXT: - 1.00 - - - 1.00 - - - - - - - imulq %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - imulq (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - imulq (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - imulq %rsi, %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imulq (%rax), %rdi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imulq (%rax), %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - imulq $665536, %rsi, %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imulq $665536, (%rax), %rdi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imulq $665536, (%rax), %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - imulq $7, %rsi, %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imulq $7, (%rax), %rdi -# CHECK-NEXT: 20.70 20.87 2.67 2.67 - 21.87 13.70 - - - 1.87 2.67 - inb $7, %al -# CHECK-NEXT: 20.70 20.87 2.33 2.33 - 21.87 13.70 - - - 1.87 2.33 - inb %dx, %al -# CHECK-NEXT: 21.00 20.67 2.33 2.33 - 22.67 14.00 - - - 1.67 2.33 - inw $7, %ax -# CHECK-NEXT: 21.30 21.30 2.33 2.33 - 21.80 13.80 - - - 1.80 2.33 - inw %dx, %ax -# CHECK-NEXT: 22.20 22.87 3.33 3.33 - 21.87 15.20 - - - 1.87 3.33 - inl $7, %eax -# CHECK-NEXT: 22.80 23.47 3.67 3.67 - 23.47 15.80 - - - 2.47 3.67 - inl %dx, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - incb %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - incb (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock incb (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - incw %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - incw (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock incw (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - incl %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - incl (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock incl (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imulq $7, (%rax), %rdi +# CHECK-NEXT: 20.70 20.87 2.67 2.67 - 21.87 13.70 - - - 2.67 1.87 - inb $7, %al +# CHECK-NEXT: 20.70 20.87 2.33 2.33 - 21.87 13.70 - - - 2.33 1.87 - inb %dx, %al +# CHECK-NEXT: 21.00 20.67 2.33 2.33 - 22.67 14.00 - - - 2.33 1.67 - inw $7, %ax +# CHECK-NEXT: 21.30 21.30 2.33 2.33 - 21.80 13.80 - - - 2.33 1.80 - inw %dx, %ax +# CHECK-NEXT: 22.20 22.87 3.33 3.33 - 21.87 15.20 - - - 3.33 1.87 - inl $7, %eax +# CHECK-NEXT: 22.80 23.47 3.67 3.67 - 23.47 15.80 - - - 3.67 2.47 - inl %dx, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - incb %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - incb (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock incb (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - incw %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - incw (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock incw (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - incl %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - incl (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock incl (%rax) # CHECK-NEXT: - - - - - - - - - - - - - incq %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - incq (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock incq (%rax) -# CHECK-NEXT: 20.20 18.20 2.67 2.67 0.50 20.20 13.20 0.50 0.50 0.50 1.20 2.67 - insb %dx, %es:(%rdi) -# CHECK-NEXT: 20.97 18.47 3.00 3.00 0.50 20.80 13.63 0.50 0.50 0.50 1.13 3.00 - insw %dx, %es:(%rdi) -# CHECK-NEXT: 22.17 18.33 3.67 3.67 0.50 22.67 14.83 0.50 0.50 0.50 1.00 3.67 - insl %dx, %es:(%rdi) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - incq (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock incq (%rax) +# CHECK-NEXT: 20.20 18.20 2.67 2.67 0.50 20.20 13.20 0.50 0.50 0.50 2.67 1.20 - insb %dx, %es:(%rdi) +# CHECK-NEXT: 20.97 18.47 3.00 3.00 0.50 20.80 13.63 0.50 0.50 0.50 3.00 1.13 - insw %dx, %es:(%rdi) +# CHECK-NEXT: 22.17 18.33 3.67 3.67 0.50 22.67 14.83 0.50 0.50 0.50 3.67 1.00 - insl %dx, %es:(%rdi) # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - int $7 -# CHECK-NEXT: 9.80 7.47 - - 2.50 8.47 4.80 2.50 2.50 2.50 1.47 - - invlpg (%rax) +# CHECK-NEXT: 9.80 7.47 - - 2.50 8.47 4.80 2.50 2.50 2.50 - 1.47 - invlpg (%rax) # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - invlpga # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - lahf -# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.40 0.33 - leave -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - lodsb (%rsi), %al -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - lodsw (%rsi), %ax -# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.40 0.33 - lodsl (%rsi), %eax -# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.40 0.33 - lodsq (%rsi), %rax -# CHECK-NEXT: 2.40 1.40 - - - 0.40 2.40 - - - 0.40 - - loop 0 -# CHECK-NEXT: 3.80 1.80 - - - 0.80 3.80 - - - 0.80 - - loope 0 -# CHECK-NEXT: 3.80 1.80 - - - 0.80 3.80 - - - 0.80 - - loopne 0 -# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.80 0.33 - movsb (%rsi), %es:(%rdi) -# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.80 0.33 - movsw (%rsi), %es:(%rdi) -# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.80 0.33 - movsl (%rsi), %es:(%rdi) -# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.80 0.33 - movsq (%rsi), %es:(%rdi) -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - movsbw %al, %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - movzbw %al, %di +# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.33 0.40 - leave +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - lodsb (%rsi), %al +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - lodsw (%rsi), %ax +# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.33 0.40 - lodsl (%rsi), %eax +# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.33 0.40 - lodsq (%rsi), %rax +# CHECK-NEXT: 2.40 1.40 - - - 0.40 2.40 - - - - 0.40 - loop 0 +# CHECK-NEXT: 3.80 1.80 - - - 0.80 3.80 - - - - 0.80 - loope 0 +# CHECK-NEXT: 3.80 1.80 - - - 0.80 3.80 - - - - 0.80 - loopne 0 +# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.33 0.80 - movsb (%rsi), %es:(%rdi) +# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.33 0.80 - movsw (%rsi), %es:(%rdi) +# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.33 0.80 - movsl (%rsi), %es:(%rdi) +# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.33 0.80 - movsq (%rsi), %es:(%rdi) +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - movsbw %al, %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - movzbw %al, %di # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - movsbw (%rax), %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - movzbw (%rax), %di -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - movsbl %al, %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - movzbl %al, %edi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movsbl (%rax), %edi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movzbl (%rax), %edi -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - movsbq %al, %rdi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - movzbq %al, %rdi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movsbq (%rax), %rdi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movzbq (%rax), %rdi -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - movswl %ax, %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - movzwl %ax, %edi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movswl (%rax), %edi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movzwl (%rax), %edi -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - movswq %ax, %rdi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - movzwq %ax, %rdi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movswq (%rax), %rdi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movzwq (%rax), %rdi -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - movslq %eax, %rdi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movslq (%rax), %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - movzbw (%rax), %di +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - movsbl %al, %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - movzbl %al, %edi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movsbl (%rax), %edi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movzbl (%rax), %edi +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - movsbq %al, %rdi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - movzbq %al, %rdi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movsbq (%rax), %rdi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movzbq (%rax), %rdi +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - movswl %ax, %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - movzwl %ax, %edi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movswl (%rax), %edi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movzwl (%rax), %edi +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - movswq %ax, %rdi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - movzwq %ax, %rdi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movswq (%rax), %rdi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movzwq (%rax), %rdi +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - movslq %eax, %rdi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movslq (%rax), %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - mulb %dil -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - mulb (%rax) -# CHECK-NEXT: 0.90 1.40 - - - 0.40 0.90 - - - 0.40 - - mulw %si -# CHECK-NEXT: 0.90 1.40 0.33 0.33 - 0.40 0.90 - - - 0.40 0.33 - mulw (%rax) -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - mull %edx -# CHECK-NEXT: 0.70 1.20 0.33 0.33 - 0.20 0.70 - - - 0.20 0.33 - mull (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - mulb (%rax) +# CHECK-NEXT: 0.90 1.40 - - - 0.40 0.90 - - - - 0.40 - mulw %si +# CHECK-NEXT: 0.90 1.40 0.33 0.33 - 0.40 0.90 - - - 0.33 0.40 - mulw (%rax) +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - mull %edx +# CHECK-NEXT: 0.70 1.20 0.33 0.33 - 0.20 0.70 - - - 0.33 0.20 - mull (%rax) # CHECK-NEXT: - 1.00 - - - 1.00 - - - - - - - mulq %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - mulq (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - negb %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - negb (%r8) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock negb (%r8) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - negw %si -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - negw (%r9) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock negw (%r9) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - negl %edx -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - negl (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock negl (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - negq %rcx -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - negq (%r10) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock negq (%r10) +# CHECK-NEXT: - 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - mulq (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - negb %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - negb (%r8) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock negb (%r8) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - negw %si +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - negw (%r9) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock negw (%r9) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - negl %edx +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - negl (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock negl (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - negq %rcx +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - negq (%r10) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock negq (%r10) # CHECK-NEXT: - - - - - - - - - - - - - nop # CHECK-NEXT: - - - - - - - - - - - - - nopw %di # CHECK-NEXT: - - - - - - - - - - - - - nopw (%rcx) @@ -2385,303 +2385,303 @@ xorq (%rax), %rdi # CHECK-NEXT: - - - - - - - - - - - - - nopl (%r8) # CHECK-NEXT: - - - - - - - - - - - - - nopq %rdx # CHECK-NEXT: - - - - - - - - - - - - - nopq (%r9) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - notb %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - notb (%r8) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock notb (%r8) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - notw %si -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - notw (%r9) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock notw (%r9) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - notl %edx -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - notl (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock notl (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - notq %rcx -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - notq (%r10) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock notq (%r10) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - orb (%rax), %dil -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - orw (%rax), %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - orl (%rax), %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - orq (%rax), %rdi -# CHECK-NEXT: 19.00 16.00 1.67 1.67 0.50 16.50 13.50 0.50 0.50 0.50 1.00 1.67 - outb %al, $7 -# CHECK-NEXT: 19.00 16.00 1.67 1.67 0.50 16.00 14.00 0.50 0.50 0.50 1.00 1.67 - outb %al, %dx -# CHECK-NEXT: 21.30 15.80 2.33 2.33 0.50 17.30 14.80 0.50 0.50 0.50 0.80 2.33 - outw %ax, $7 -# CHECK-NEXT: 20.70 16.20 2.33 2.33 0.50 17.20 14.70 0.50 0.50 0.50 1.20 2.33 - outw %ax, %dx -# CHECK-NEXT: 22.30 15.80 3.00 3.00 0.50 19.30 15.80 0.50 0.50 0.50 0.80 3.00 - outl %eax, $7 -# CHECK-NEXT: 21.70 16.20 3.00 3.00 0.50 19.20 15.70 0.50 0.50 0.50 1.20 3.00 - outl %eax, %dx -# CHECK-NEXT: 20.70 17.20 2.33 2.33 0.50 18.20 13.70 0.50 0.50 0.50 1.20 2.33 - outsb (%rsi), %dx -# CHECK-NEXT: 21.00 17.50 2.67 2.67 0.50 19.00 14.50 0.50 0.50 0.50 1.00 2.67 - outsw (%rsi), %dx -# CHECK-NEXT: 22.20 17.20 3.33 3.33 0.50 21.20 15.20 0.50 0.50 0.50 1.20 3.33 - outsl (%rsi), %dx +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - notb %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - notb (%r8) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock notb (%r8) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - notw %si +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - notw (%r9) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock notw (%r9) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - notl %edx +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - notl (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock notl (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - notq %rcx +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - notq (%r10) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock notq (%r10) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - orb (%rax), %dil +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - orw (%rax), %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - orl (%rax), %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orq $7, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - orq (%rax), %rdi +# CHECK-NEXT: 19.00 16.00 1.67 1.67 0.50 16.50 13.50 0.50 0.50 0.50 1.67 1.00 - outb %al, $7 +# CHECK-NEXT: 19.00 16.00 1.67 1.67 0.50 16.00 14.00 0.50 0.50 0.50 1.67 1.00 - outb %al, %dx +# CHECK-NEXT: 21.30 15.80 2.33 2.33 0.50 17.30 14.80 0.50 0.50 0.50 2.33 0.80 - outw %ax, $7 +# CHECK-NEXT: 20.70 16.20 2.33 2.33 0.50 17.20 14.70 0.50 0.50 0.50 2.33 1.20 - outw %ax, %dx +# CHECK-NEXT: 22.30 15.80 3.00 3.00 0.50 19.30 15.80 0.50 0.50 0.50 3.00 0.80 - outl %eax, $7 +# CHECK-NEXT: 21.70 16.20 3.00 3.00 0.50 19.20 15.70 0.50 0.50 0.50 3.00 1.20 - outl %eax, %dx +# CHECK-NEXT: 20.70 17.20 2.33 2.33 0.50 18.20 13.70 0.50 0.50 0.50 2.33 1.20 - outsb (%rsi), %dx +# CHECK-NEXT: 21.00 17.50 2.67 2.67 0.50 19.00 14.50 0.50 0.50 0.50 2.67 1.00 - outsw (%rsi), %dx +# CHECK-NEXT: 22.20 17.20 3.33 3.33 0.50 21.20 15.20 0.50 0.50 0.50 3.33 1.20 - outsl (%rsi), %dx # CHECK-NEXT: 0.50 - - - - 1.00 0.50 - - - - - - pause -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rclb %dil -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrb %dil -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rclb (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrb (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rclb $7, %dil -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrb $7, %dil -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rclb $7, (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrb $7, (%rax) -# CHECK-NEXT: 2.90 2.40 - - - 0.40 2.90 - - - 0.40 - - rclb %cl, %dil -# CHECK-NEXT: 2.60 3.60 - - - 0.60 2.60 - - - 0.60 - - rcrb %cl, %dil -# CHECK-NEXT: 2.70 2.20 0.33 0.33 0.50 0.20 2.70 0.50 0.50 0.50 0.20 0.33 - rclb %cl, (%rax) -# CHECK-NEXT: 2.40 3.40 0.33 0.33 0.50 0.40 2.40 0.50 0.50 0.50 0.40 0.33 - rcrb %cl, (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rclw %di -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrw %di -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rclw (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrw (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rclw $7, %di -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrw $7, %di -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rclw $7, (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrw $7, (%rax) -# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - 0.40 - - rclw %cl, %di -# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - 0.40 - - rcrw %cl, %di -# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.40 0.33 - rclw %cl, (%rax) -# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.40 0.33 - rcrw %cl, (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcll %edi -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrl %edi -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcll (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrl (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcll $7, %edi -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrl $7, %edi -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcll $7, (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrl $7, (%rax) -# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - 0.40 - - rcll %cl, %edi -# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - 0.40 - - rcrl %cl, %edi -# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.40 0.33 - rcll %cl, (%rax) -# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.40 0.33 - rcrl %cl, (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rclq %rdi -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrq %rdi -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rclq (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrq (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rclq $7, %rdi -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrq $7, %rdi -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rclq $7, (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrq $7, (%rax) -# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - 0.40 - - rclq %cl, %rdi -# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - 0.40 - - rcrq %cl, %rdi -# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.40 0.33 - rclq %cl, (%rax) -# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.40 0.33 - rcrq %cl, (%rax) -# CHECK-NEXT: 16.33 13.33 - - - 10.67 13.33 - - - 0.33 - - rdmsr -# CHECK-NEXT: 4.80 3.80 - - - 2.80 4.80 - - - 1.80 - - rdpmc -# CHECK-NEXT: 4.00 4.00 - - - 2.00 4.00 - - - 1.00 - - rdtsc +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rclb %dil +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrb %dil +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rclb (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrb (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rclb $7, %dil +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrb $7, %dil +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rclb $7, (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrb $7, (%rax) +# CHECK-NEXT: 2.90 2.40 - - - 0.40 2.90 - - - - 0.40 - rclb %cl, %dil +# CHECK-NEXT: 2.60 3.60 - - - 0.60 2.60 - - - - 0.60 - rcrb %cl, %dil +# CHECK-NEXT: 2.70 2.20 0.33 0.33 0.50 0.20 2.70 0.50 0.50 0.50 0.33 0.20 - rclb %cl, (%rax) +# CHECK-NEXT: 2.40 3.40 0.33 0.33 0.50 0.40 2.40 0.50 0.50 0.50 0.33 0.40 - rcrb %cl, (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rclw %di +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrw %di +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rclw (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrw (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rclw $7, %di +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrw $7, %di +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rclw $7, (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrw $7, (%rax) +# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - - 0.40 - rclw %cl, %di +# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - - 0.40 - rcrw %cl, %di +# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.33 0.40 - rclw %cl, (%rax) +# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.33 0.40 - rcrw %cl, (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcll %edi +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrl %edi +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcll (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrl (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcll $7, %edi +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrl $7, %edi +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcll $7, (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrl $7, (%rax) +# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - - 0.40 - rcll %cl, %edi +# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - - 0.40 - rcrl %cl, %edi +# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.33 0.40 - rcll %cl, (%rax) +# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.33 0.40 - rcrl %cl, (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rclq %rdi +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrq %rdi +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rclq (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrq (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rclq $7, %rdi +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrq $7, %rdi +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rclq $7, (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrq $7, (%rax) +# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - - 0.40 - rclq %cl, %rdi +# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - - 0.40 - rcrq %cl, %rdi +# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.33 0.40 - rclq %cl, (%rax) +# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.33 0.40 - rcrq %cl, (%rax) +# CHECK-NEXT: 16.33 13.33 - - - 10.67 13.33 - - - - 0.33 - rdmsr +# CHECK-NEXT: 4.80 3.80 - - - 2.80 4.80 - - - - 1.80 - rdpmc +# CHECK-NEXT: 4.00 4.00 - - - 2.00 4.00 - - - - 1.00 - rdtsc # CHECK-NEXT: 7.50 5.33 - - - 4.00 4.17 - - - - - - rdtscp # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolb %dil # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorb %dil -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolb (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorb (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolb (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorb (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolb $7, %dil # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorb $7, %dil -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolb $7, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorb $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolb $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorb $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolb %cl, %dil # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorb %cl, %dil -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolb %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorb %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolb %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorb %cl, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolw %di # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorw %di -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolw (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorw (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolw (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorw (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolw $7, %di # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorw $7, %di -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolw $7, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorw $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolw $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorw $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolw %cl, %di # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorw %cl, %di -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolw %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorw %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolw %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorw %cl, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - roll %edi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorl %edi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - roll (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorl (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - roll (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorl (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - roll $7, %edi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorl $7, %edi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - roll $7, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorl $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - roll $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorl $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - roll %cl, %edi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorl %cl, %edi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - roll %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorl %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - roll %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorl %cl, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolq %rdi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorq %rdi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolq (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorq (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolq (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorq (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolq $7, %rdi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorq $7, %rdi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolq $7, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorq $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolq $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorq $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolq %cl, %rdi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorq %cl, %rdi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolq %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorq %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolq %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorq %cl, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - sahf # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarb %dil # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlb %dil # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrb %dil -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarb (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shlb (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrb (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarb (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shlb (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrb (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarb $7, %dil # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlb $7, %dil # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrb $7, %dil -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarb $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shlb $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrb $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarb $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shlb $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrb $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - sarb %cl, %dil # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shlb %cl, %dil # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shrb %cl, %dil -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - sarb %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shlb %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shrb %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - sarb %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shlb %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shrb %cl, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarw %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlw %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrw %di -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarw (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shlw (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrw (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarw (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shlw (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrw (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarw $7, %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlw $7, %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrw $7, %di -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarw $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shlw $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrw $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarw $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shlw $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrw $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - sarw %cl, %di # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shlw %cl, %di # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shrw %cl, %di -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - sarw %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shlw %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shrw %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - sarw %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shlw %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shrw %cl, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarl %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shll %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrl %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarl (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shll (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrl (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarl (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shll (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrl (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarl $7, %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shll $7, %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrl $7, %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarl $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shll $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrl $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarl $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shll $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrl $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - sarl %cl, %edi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shll %cl, %edi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shrl %cl, %edi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - sarl %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shll %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shrl %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - sarl %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shll %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shrl %cl, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarq %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlq %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrq %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarq (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shlq (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrq (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarq (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shlq (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrq (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarq $7, %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlq $7, %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrq $7, %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarq $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shlq $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrq $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarq $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shlq $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrq $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - sarq %cl, %rdi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shlq %cl, %rdi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shrq %cl, %rdi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - sarq %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shlq %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shrq %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - sarq %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shlq %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shrq %cl, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbb $0, %al # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbb $0, %dil -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbb $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbb $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbb $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbb $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbb $7, %al # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbb $7, %dil -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbb $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbb $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbb $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbb $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbb %sil, %dil -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - sbbb %sil, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock sbbb %sil, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - sbbb (%rax), %dil +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - sbbb %sil, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock sbbb %sil, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - sbbb (%rax), %dil # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbw $0, %ax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbw $0, %di -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbw $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbw $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbw $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbw $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbw $511, %ax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbw $511, %di -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbw $511, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbw $511, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbw $511, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbw $511, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbw $7, %di -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbw $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbw $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbw $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbw $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbw %si, %di -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - sbbw %si, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock sbbw %si, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - sbbw (%rax), %di +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - sbbw %si, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock sbbw %si, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - sbbw (%rax), %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbl $0, %eax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbl $0, %edi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbl $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbl $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbl $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbl $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbl $665536, %eax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbl $665536, %edi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbl $665536, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbl $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbl $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbl $665536, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbl $7, %edi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbl $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbl $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbl $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbl $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbl %esi, %edi -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - sbbl %esi, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock sbbl %esi, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - sbbl (%rax), %edi +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - sbbl %esi, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock sbbl %esi, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - sbbl (%rax), %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbq $0, %rax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbq $0, %rdi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbq $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbq $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbq $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbq $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbq $665536, %rax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbq $665536, %rdi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbq $665536, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbq $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbq $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbq $665536, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbq $7, %rdi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbq $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbq $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbq $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbq $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbq %rsi, %rdi -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - sbbq %rsi, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock sbbq %rsi, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - sbbq (%rax), %rdi -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - scasb %es:(%rdi), %al -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - scasw %es:(%rdi), %ax -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - scasl %es:(%rdi), %eax -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - scasq %es:(%rdi), %rax +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - sbbq %rsi, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock sbbq %rsi, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - sbbq (%rax), %rdi +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - scasb %es:(%rdi), %al +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - scasw %es:(%rdi), %ax +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - scasl %es:(%rdi), %eax +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - scasq %es:(%rdi), %rax # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - seto %al # CHECK-NEXT: 1.00 - - - 0.50 - 1.00 0.50 0.50 0.50 - - - seto (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - setno %al @@ -2714,171 +2714,171 @@ xorq (%rax), %rdi # CHECK-NEXT: 1.00 - - - 0.50 - 1.00 0.50 0.50 0.50 - - - setg (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - setle %al # CHECK-NEXT: 1.00 - - - 0.50 - 1.00 0.50 0.50 0.50 - - - setle (%rax) -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - shldw %cl, %si, %di -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - shrdw %cl, %si, %di -# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - shldw %cl, %si, (%rax) -# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - shrdw %cl, %si, (%rax) +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - shldw %cl, %si, %di +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - shrdw %cl, %si, %di +# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - shldw %cl, %si, (%rax) +# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - shrdw %cl, %si, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - shldw $7, %si, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - shrdw $7, %si, %di -# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - shldw $7, %si, (%rax) -# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - shrdw $7, %si, (%rax) -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - shldl %cl, %esi, %edi -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - shrdl %cl, %esi, %edi -# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - shldl %cl, %esi, (%rax) -# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - shrdl %cl, %esi, (%rax) +# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - shldw $7, %si, (%rax) +# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - shrdw $7, %si, (%rax) +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - shldl %cl, %esi, %edi +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - shrdl %cl, %esi, %edi +# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - shldl %cl, %esi, (%rax) +# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - shrdl %cl, %esi, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - shldl $7, %esi, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - shrdl $7, %esi, %edi -# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - shldl $7, %esi, (%rax) -# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - shrdl $7, %esi, (%rax) -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - shldq %cl, %rsi, %rdi -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - shrdq %cl, %rsi, %rdi -# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - shldq %cl, %rsi, (%rax) -# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - shrdq %cl, %rsi, (%rax) +# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - shldl $7, %esi, (%rax) +# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - shrdl $7, %esi, (%rax) +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - shldq %cl, %rsi, %rdi +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - shrdq %cl, %rsi, %rdi +# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - shldq %cl, %rsi, (%rax) +# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - shrdq %cl, %rsi, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - shldq $7, %rsi, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - shrdq $7, %rsi, %rdi -# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - shldq $7, %rsi, (%rax) -# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - shrdq $7, %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - stc -# CHECK-NEXT: 0.70 0.20 - - - 0.20 0.70 - - - 0.20 - - std -# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 0.40 - - stosb %al, %es:(%rdi) -# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 0.40 - - stosw %ax, %es:(%rdi) -# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 0.40 - - stosl %eax, %es:(%rdi) -# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 0.40 - - stosq %rax, %es:(%rdi) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - subb (%rax), %dil -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - subw (%rax), %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - subl (%rax), %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subq $665536, (%rax) +# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - shldq $7, %rsi, (%rax) +# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - shrdq $7, %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - stc +# CHECK-NEXT: 0.70 0.20 - - - 0.20 0.70 - - - - 0.20 - std +# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 - 0.40 - stosb %al, %es:(%rdi) +# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 - 0.40 - stosw %ax, %es:(%rdi) +# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 - 0.40 - stosl %eax, %es:(%rdi) +# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 - 0.40 - stosq %rax, %es:(%rdi) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - subb (%rax), %dil +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - subw (%rax), %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - subl (%rax), %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subq $665536, (%rax) # CHECK-NEXT: - - - - - - - - - - - - - subq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - subq (%rax), %rdi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - subq (%rax), %rdi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testq $7, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testq %rsi, (%rax) # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - ud2 # CHECK-NEXT: 52.00 31.50 - - 0.50 27.00 31.50 0.50 0.50 0.50 - - - wrmsr -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xaddb %bl, %cl -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - xaddb %bl, (%rcx) -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - lock xaddb %bl, (%rcx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xaddw %bx, %cx -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - xaddw %ax, (%rbx) -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - lock xaddw %ax, (%rbx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xaddl %ebx, %ecx -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - xaddl %eax, (%rbx) -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - lock xaddl %eax, (%rbx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xaddq %rbx, %rcx -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - xaddq %rax, (%rbx) -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - lock xaddq %rax, (%rbx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgb %bl, %cl -# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.80 0.33 - xchgb %bl, (%rbx) -# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.80 0.33 - lock xchgb %bl, (%rbx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgw %bx, %ax -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgw %bx, %cx -# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.80 0.33 - xchgw %ax, (%rbx) -# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.80 0.33 - lock xchgw %ax, (%rbx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgl %ebx, %eax -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgl %ebx, %ecx -# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.80 0.33 - xchgl %eax, (%rbx) -# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.80 0.33 - lock xchgl %eax, (%rbx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgq %rbx, %rax -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgq %rbx, %rcx -# CHECK-NEXT: 1.50 1.00 0.33 0.33 0.50 1.00 1.50 0.50 0.50 0.50 1.00 0.33 - xchgq %rax, (%rbx) -# CHECK-NEXT: 1.50 1.00 0.33 0.33 0.50 1.00 1.50 0.50 0.50 0.50 1.00 0.33 - lock xchgq %rax, (%rbx) -# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.40 0.33 - xlatb -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - xorb (%rax), %dil -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - xorw (%rax), %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - xorl (%rax), %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - xorq (%rax), %rdi +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xaddb %bl, %cl +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - xaddb %bl, (%rcx) +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - lock xaddb %bl, (%rcx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xaddw %bx, %cx +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - xaddw %ax, (%rbx) +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - lock xaddw %ax, (%rbx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xaddl %ebx, %ecx +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - xaddl %eax, (%rbx) +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - lock xaddl %eax, (%rbx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xaddq %rbx, %rcx +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - xaddq %rax, (%rbx) +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - lock xaddq %rax, (%rbx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgb %bl, %cl +# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.33 0.80 - xchgb %bl, (%rbx) +# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.33 0.80 - lock xchgb %bl, (%rbx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgw %bx, %ax +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgw %bx, %cx +# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.33 0.80 - xchgw %ax, (%rbx) +# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.33 0.80 - lock xchgw %ax, (%rbx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgl %ebx, %eax +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgl %ebx, %ecx +# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.33 0.80 - xchgl %eax, (%rbx) +# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.33 0.80 - lock xchgl %eax, (%rbx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgq %rbx, %rax +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgq %rbx, %rcx +# CHECK-NEXT: 1.50 1.00 0.33 0.33 0.50 1.00 1.50 0.50 0.50 0.50 0.33 1.00 - xchgq %rax, (%rbx) +# CHECK-NEXT: 1.50 1.00 0.33 0.33 0.50 1.00 1.50 0.50 0.50 0.50 0.33 1.00 - lock xchgq %rax, (%rbx) +# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.33 0.40 - xlatb +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - xorb (%rax), %dil +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - xorw (%rax), %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - xorl (%rax), %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorq $7, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - xorq (%rax), %rdi diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-x87.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-x87.s index e54f93ef8f535..042740c91d808 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-x87.s @@ -372,7 +372,7 @@ fyl2xp1 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 105.00 36.50 70.33 70.33 78.50 133.00 48.50 19.00 19.00 32.50 1.00 27.33 7.00 +# CHECK-NEXT: 105.00 36.50 70.33 70.33 78.50 133.00 48.50 19.00 19.00 32.50 27.33 1.00 7.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -380,12 +380,12 @@ fyl2xp1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - fabs # CHECK-NEXT: - - - - - 1.00 - - - - - - - fadd %st, %st(1) # CHECK-NEXT: - - - - - 1.00 - - - - - - - fadd %st(2), %st -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - fadds (%ecx) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - faddl (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - fadds (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - faddl (%ecx) # CHECK-NEXT: - - - - - 1.00 - - - - - - - faddp %st, %st(1) # CHECK-NEXT: - - - - - 1.00 - - - - - - - faddp %st, %st(2) -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - fiadds (%ecx) -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - fiaddl (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - fiadds (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - fiaddl (%ecx) # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - fbld (%ecx) # CHECK-NEXT: - - - - 1.50 - - - - 0.50 - - - fbstp (%eax) # CHECK-NEXT: 1.00 - - - - - - - - - - - - fchs @@ -400,12 +400,12 @@ fyl2xp1 # CHECK-NEXT: - 1.00 - - - - - - - - - - - fcmovu %st(1), %st # CHECK-NEXT: - - - - - 1.00 - - - - - - - fcom %st(1) # CHECK-NEXT: - - - - - 1.00 - - - - - - - fcom %st(3) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - fcoms (%ecx) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - fcoml (%eax) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - fcoms (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - fcoml (%eax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - fcomp %st(1) # CHECK-NEXT: - - - - - 1.00 - - - - - - - fcomp %st(3) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - fcomps (%ecx) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - fcompl (%eax) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - fcomps (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - fcompl (%eax) # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - fcompp # CHECK-NEXT: - - - - - 1.00 - - - - - - - fcomi %st(3), %st # CHECK-NEXT: - - - - - 1.00 - - - - - - - fcompi %st(3), %st @@ -413,28 +413,28 @@ fyl2xp1 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - fdecstp # CHECK-NEXT: 1.00 - - - - - - - - - - - - fdiv %st, %st(1) # CHECK-NEXT: 1.00 - - - - - - - - - - - - fdiv %st(2), %st -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - fdivs (%ecx) -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - fdivl (%eax) +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - fdivs (%ecx) +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - fdivl (%eax) # CHECK-NEXT: 1.00 - - - - - - - - - - - - fdivp %st, %st(1) # CHECK-NEXT: 1.00 - - - - - - - - - - - - fdivp %st, %st(2) -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - fidivs (%ecx) -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - fidivl (%eax) +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - fidivs (%ecx) +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - fidivl (%eax) # CHECK-NEXT: 1.00 - - - - - - - - - - - - fdivr %st, %st(1) # CHECK-NEXT: 1.00 - - - - - - - - - - - - fdivr %st(2), %st -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - fdivrs (%ecx) -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - fdivrl (%eax) +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - fdivrs (%ecx) +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - fdivrl (%eax) # CHECK-NEXT: 1.00 - - - - - - - - - - - - fdivrp %st, %st(1) # CHECK-NEXT: 1.00 - - - - - - - - - - - - fdivrp %st, %st(2) -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - fidivrs (%ecx) -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - fidivrl (%eax) +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - fidivrs (%ecx) +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - fidivrl (%eax) # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - ffree %st(0) -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - ficoms (%ecx) -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - ficoml (%eax) -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - ficomps (%ecx) -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - ficompl (%eax) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - filds (%edx) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - fildl (%ecx) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - fildll (%eax) +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - ficoms (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - ficoml (%eax) +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - ficomps (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - ficompl (%eax) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - filds (%edx) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - fildl (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - fildll (%eax) # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - fincstp # CHECK-NEXT: 3.00 1.50 - - - 9.00 1.50 - - - - - - fninit # CHECK-NEXT: - - - - 1.50 1.00 - - - 0.50 - - - fists (%edx) @@ -446,11 +446,11 @@ fyl2xp1 # CHECK-NEXT: - - - - 1.50 1.00 - - - 0.50 - - - fisttpl (%ecx) # CHECK-NEXT: - - - - 1.50 1.00 - - - 0.50 - - - fisttpll (%eax) # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - fld %st(0) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - flds (%edx) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - fldl (%ecx) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - fldt (%eax) -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - fldcw (%eax) -# CHECK-NEXT: 9.50 - 22.17 22.17 - 2.50 5.00 - - - - 2.67 - fldenv (%eax) +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - flds (%edx) +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - fldl (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - fldt (%eax) +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - fldcw (%eax) +# CHECK-NEXT: 9.50 - 22.17 22.17 - 2.50 5.00 - - - 2.67 - - fldenv (%eax) # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - fld1 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - fldl2e # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - fldl2t @@ -460,12 +460,12 @@ fyl2xp1 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - fldz # CHECK-NEXT: 1.00 - - - - - - - - - - - - fmul %st, %st(1) # CHECK-NEXT: 1.00 - - - - - - - - - - - - fmul %st(2), %st -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - fmuls (%ecx) -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - fmull (%eax) +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - fmuls (%ecx) +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - fmull (%eax) # CHECK-NEXT: 1.00 - - - - - - - - - - - - fmulp %st, %st(1) # CHECK-NEXT: 1.00 - - - - - - - - - - - - fmulp %st, %st(2) -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - fimuls (%ecx) -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - fimull (%eax) +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - fimuls (%ecx) +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - fimull (%eax) # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - fnop # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - fpatan # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - fprem @@ -493,20 +493,20 @@ fyl2xp1 # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - fnsave (%eax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - fsub %st, %st(1) # CHECK-NEXT: - - - - - 1.00 - - - - - - - fsub %st(2), %st -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - fsubs (%ecx) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - fsubl (%eax) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - fsubs (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - fsubl (%eax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - fsubp %st, %st(1) # CHECK-NEXT: - - - - - 1.00 - - - - - - - fsubp %st, %st(2) -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - fisubs (%ecx) -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - fisubl (%eax) +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - fisubs (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - fisubl (%eax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - fsubr %st, %st(1) # CHECK-NEXT: - - - - - 1.00 - - - - - - - fsubr %st(2), %st -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - fsubrs (%ecx) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - fsubrl (%eax) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - fsubrs (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - fsubrl (%eax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - fsubrp %st, %st(1) # CHECK-NEXT: - - - - - 1.00 - - - - - - - fsubrp %st, %st(2) -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - fisubrs (%ecx) -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - fisubrl (%eax) +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - fisubrs (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - fisubrl (%eax) # CHECK-NEXT: 1.00 - - - - - - - - - - - - ftst # CHECK-NEXT: - - - - - 1.00 - - - - - - - fucom %st(1) # CHECK-NEXT: - - - - - 1.00 - - - - - - - fucom %st(3) @@ -519,8 +519,8 @@ fyl2xp1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - fxam # CHECK-NEXT: 4.00 2.00 - - - 4.00 5.00 - - - - - - fxch %st(1) # CHECK-NEXT: 4.00 2.00 - - - 4.00 5.00 - - - - - - fxch %st(3) -# CHECK-NEXT: 5.50 0.50 34.50 34.50 - 1.00 3.00 - - - - 11.00 - fxrstor (%eax) -# CHECK-NEXT: 8.00 11.00 0.67 0.67 19.00 6.00 6.00 19.00 19.00 19.00 1.00 0.67 - fxsave (%eax) +# CHECK-NEXT: 5.50 0.50 34.50 34.50 - 1.00 3.00 - - - 11.00 - - fxrstor (%eax) +# CHECK-NEXT: 8.00 11.00 0.67 0.67 19.00 6.00 6.00 19.00 19.00 19.00 0.67 1.00 - fxsave (%eax) # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - fxtract # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - fyl2x # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - fyl2xp1 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-xsave.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-xsave.s index 824e8d3728684..7ba16d35730e0 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-xsave.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-xsave.s @@ -43,12 +43,12 @@ xsetbv # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 61.50 61.17 23.00 23.00 0.50 43.33 56.67 0.50 0.50 0.50 6.33 2.00 - +# CHECK-NEXT: 61.50 61.17 23.00 23.00 0.50 43.33 56.67 0.50 0.50 0.50 2.00 6.33 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 6.40 6.40 - - - 2.40 6.40 - - - 1.40 - - xgetbv -# CHECK-NEXT: - 1.00 10.83 10.83 - - 8.00 - - - - 0.33 - xrstor (%rax) -# CHECK-NEXT: - 1.00 10.83 10.83 - - 8.00 - - - - 0.33 - xrstors (%rax) -# CHECK-NEXT: 41.50 38.50 1.33 1.33 0.50 32.00 22.00 0.50 0.50 0.50 - 1.33 - xsave (%rax) -# CHECK-NEXT: 13.60 14.27 - - - 8.93 12.27 - - - 4.93 - - xsetbv +# CHECK-NEXT: 6.40 6.40 - - - 2.40 6.40 - - - - 1.40 - xgetbv +# CHECK-NEXT: - 1.00 10.83 10.83 - - 8.00 - - - 0.33 - - xrstor (%rax) +# CHECK-NEXT: - 1.00 10.83 10.83 - - 8.00 - - - 0.33 - - xrstors (%rax) +# CHECK-NEXT: 41.50 38.50 1.33 1.33 0.50 32.00 22.00 0.50 0.50 0.50 1.33 - - xsave (%rax) +# CHECK-NEXT: 13.60 14.27 - - - 8.93 12.27 - - - - 4.93 - xsetbv diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/zero-idioms.s index fd5c0d29d12d4..e11c548f5a7b6 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/zero-idioms.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/zero-idioms.s @@ -345,14 +345,14 @@ vpxorq %zmm19, %zmm19, %zmm21 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 46.00 44.00 - - - 46.00 1.00 - - - 2.00 - - +# CHECK-NEXT: 46.00 44.00 - - - 46.00 1.00 - - - - 2.00 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: - - - - - - - - - - 1.00 - - subl %eax, %eax +# CHECK-NEXT: - - - - - - - - - - - 1.00 - subl %eax, %eax # CHECK-NEXT: - - - - - - 1.00 - - - - - - subq %rax, %rax # CHECK-NEXT: - - - - - 1.00 - - - - - - - xorl %eax, %eax -# CHECK-NEXT: - - - - - - - - - - 1.00 - - xorq %rax, %rax +# CHECK-NEXT: - - - - - - - - - - - 1.00 - xorq %rax, %rax # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpgtb %mm2, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpgtd %mm2, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpgtw %mm2, %mm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s index 4f384dcf35c83..6df52b307c2a3 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s @@ -636,6 +636,66 @@ vpgatherdd (%rax,%zmm1,2), %zmm2 {k1} vpgatherqq (%rax,%zmm1,2), %zmm2 {k1} vpgatherqd (%rax,%zmm1,2), %ymm2 {k1} +vpmovdb %zmm19, %xmm16 +vpmovdb %zmm19, (%rax) +vpmovdb %zmm19, %xmm16 {k1} +vpmovdb %zmm19, (%rax) {k1} +vpmovdb %zmm19, %xmm16 {k1}{z} + +vpmovdw %zmm19, %ymm16 +vpmovdw %zmm19, (%rax) +vpmovdw %zmm19, %ymm16 {k1} +vpmovdw %zmm19, (%rax) {k1} +vpmovdw %zmm19, %ymm16 {k1}{z} + +vpmovqb %zmm19, %xmm16 +vpmovqb %zmm19, (%rax) +vpmovqb %zmm19, %xmm16 {k1} +vpmovqb %zmm19, (%rax) {k1} +vpmovqb %zmm19, %xmm16 {k1}{z} + +vpmovqd %zmm19, %ymm16 +vpmovqd %zmm19, (%rax) +vpmovqd %zmm19, %ymm16 {k1} +vpmovqd %zmm19, (%rax) {k1} +vpmovqd %zmm19, %ymm16 {k1}{z} + +vpmovqw %zmm19, %xmm16 +vpmovqw %zmm19, (%rax) +vpmovqw %zmm19, %xmm16 {k1} +vpmovqw %zmm19, (%rax) {k1} +vpmovqw %zmm19, %xmm16 {k1}{z} + +vpmovsdb %zmm19, %xmm16 +vpmovsdb %zmm19, (%rax) +vpmovsdb %zmm19, %xmm16 {k1} +vpmovsdb %zmm19, (%rax) {k1} +vpmovsdb %zmm19, %xmm16 {k1}{z} + +vpmovsdw %zmm19, %ymm16 +vpmovsdw %zmm19, (%rax) +vpmovsdw %zmm19, %ymm16 {k1} +vpmovsdw %zmm19, (%rax) {k1} +vpmovsdw %zmm19, %ymm16 {k1}{z} + +vpmovsqb %zmm19, %xmm16 +vpmovsqb %zmm19, (%rax) +vpmovsqb %zmm19, %xmm16 {k1} +vpmovsqb %zmm19, (%rax) {k1} +vpmovsqb %zmm19, %xmm16 {k1}{z} + +vpmovsqd %zmm19, %ymm16 +vpmovsqd %zmm19, (%rax) +vpmovsqd %zmm19, %ymm16 {k1} +vpmovsqd %zmm19, (%rax) {k1} +vpmovsqd %zmm19, %ymm16 {k1}{z} + +vpmovsqw %zmm19, %xmm16 +vpmovsqw %zmm19, (%rax) +vpmovsqw %zmm19, %xmm16 {k1} +vpmovsqw %zmm19, (%rax) {k1} +vpmovsqw %zmm19, %xmm16 {k1}{z} + vpmovsxbd %xmm16, %zmm19 vpmovsxbd (%rax), %zmm19 vpmovsxbd %xmm16, %zmm19 {k1} @@ -671,6 +731,36 @@ vpmovsxwq (%rax), %zmm19 {k1} vpmovsxwq %xmm16, %zmm19 {z}{k1} vpmovsxwq (%rax), %zmm19 {z}{k1} +vpmovusdb %zmm19, %xmm16 +vpmovusdb %zmm19, (%rax) +vpmovusdb %zmm19, %xmm16 {k1} +vpmovusdb %zmm19, (%rax) {k1} +vpmovusdb %zmm19, %xmm16 {k1}{z} + +vpmovusdw %zmm19, %ymm16 +vpmovusdw %zmm19, (%rax) +vpmovusdw %zmm19, %ymm16 {k1} +vpmovusdw %zmm19, (%rax) {k1} +vpmovusdw %zmm19, %ymm16 {k1}{z} + +vpmovusqb %zmm19, %xmm16 +vpmovusqb %zmm19, (%rax) +vpmovusqb %zmm19, %xmm16 {k1} +vpmovusqb %zmm19, (%rax) {k1} +vpmovusqb %zmm19, %xmm16 {k1}{z} + +vpmovusqd %zmm19, %ymm16 +vpmovusqd %zmm19, (%rax) +vpmovusqd %zmm19, %ymm16 {k1} +vpmovusqd %zmm19, (%rax) {k1} +vpmovusqd %zmm19, %ymm16 {k1}{z} + +vpmovusqw %zmm19, %xmm16 +vpmovusqw %zmm19, (%rax) +vpmovusqw %zmm19, %xmm16 {k1} +vpmovusqw %zmm19, (%rax) {k1} +vpmovusqw %zmm19, %xmm16 {k1}{z} + vpmovzxbd %xmm16, %zmm19 vpmovzxbd (%rax), %zmm19 vpmovzxbd %xmm16, %zmm19 {k1} @@ -1646,6 +1736,56 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 5 25 8.00 * vpgatherdd (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: 5 21 4.00 * vpgatherqq (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: 5 21 4.00 * vpgatherqd (%rax,%zmm1,2), %ymm2 {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovdb %zmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovdb %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovdw %zmm19, %ymm16 +# CHECK-NEXT: 4 5 2.00 * vpmovdw %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovqb %zmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovqb %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vpmovqd %zmm19, %ymm16 +# CHECK-NEXT: 3 4 1.00 * vpmovqd %zmm19, (%rax) +# CHECK-NEXT: 1 3 1.00 vpmovqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 3 4 1.00 * vpmovqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 3 1.00 vpmovqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovqw %zmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovqw %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovqw %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsdb %zmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsdb %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsdw %zmm19, %ymm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsdw %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsqb %zmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsqb %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsqd %zmm19, %ymm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsqd %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsqw %zmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsqw %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsqw %zmm19, %xmm16 {%k1} {z} # CHECK-NEXT: 1 3 1.00 vpmovsxbd %xmm16, %zmm19 # CHECK-NEXT: 2 10 1.00 * vpmovsxbd (%rax), %zmm19 # CHECK-NEXT: 1 3 1.00 vpmovsxbd %xmm16, %zmm19 {%k1} @@ -1676,6 +1816,31 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 2 10 1.00 * vpmovsxwq (%rax), %zmm19 {%k1} # CHECK-NEXT: 1 3 1.00 vpmovsxwq %xmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vpmovsxwq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusdb %zmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusdb %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovusdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovusdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusdw %zmm19, %ymm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusdw %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovusdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovusdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusqb %zmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusqb %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovusqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovusqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusqd %zmm19, %ymm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusqd %zmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovusqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovusqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vpmovusqw %zmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusqw %zmm19, (%rax) +# CHECK-NEXT: 1 3 1.00 vpmovusqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 3 1.00 vpmovusqw %zmm19, %xmm16 {%k1} {z} # CHECK-NEXT: 1 3 1.00 vpmovzxbd %xmm16, %zmm19 # CHECK-NEXT: 2 10 1.00 * vpmovzxbd (%rax), %zmm19 # CHECK-NEXT: 1 3 1.00 vpmovzxbd %xmm16, %zmm19 {%k1} @@ -2055,7 +2220,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 612.00 353.00 104.00 360.33 360.33 97.00 652.00 6.00 32.33 +# CHECK-NEXT: - 612.00 353.00 104.00 370.33 370.33 127.00 794.00 6.00 42.33 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -2615,6 +2780,56 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - 1.58 0.58 8.00 8.00 - 0.58 0.25 - vpgatherdd (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - vpgatherqq (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - vpgatherqd (%rax,%zmm1,2), %ymm2 {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovdb %zmm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovdb %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovdw %zmm19, %ymm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovdw %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovqb %zmm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovqb %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 1.00 - - vpmovqd %zmm19, %ymm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 vpmovqd %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 1.00 - - vpmovqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 vpmovqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 1.00 - - vpmovqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovqw %zmm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovqw %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovqw %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsdb %zmm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsdb %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsdw %zmm19, %ymm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsdw %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsqb %zmm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsqb %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsqd %zmm19, %ymm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsqd %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsqw %zmm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsqw %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsqw %zmm19, %xmm16 {%k1} {z} # CHECK-NEXT: - - - - - - - 1.00 - - vpmovsxbd %xmm16, %zmm19 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpmovsxbd (%rax), %zmm19 # CHECK-NEXT: - - - - - - - 1.00 - - vpmovsxbd %xmm16, %zmm19 {%k1} @@ -2645,6 +2860,31 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpmovsxwq (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - - - 1.00 - - vpmovsxwq %xmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpmovsxwq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusdb %zmm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusdb %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusdw %zmm19, %ymm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusdw %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusqb %zmm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusqb %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusqd %zmm19, %ymm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusqd %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 1.00 - - vpmovusqw %zmm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusqw %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - 1.00 - - vpmovusqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 1.00 - - vpmovusqw %zmm19, %xmm16 {%k1} {z} # CHECK-NEXT: - - - - - - - 1.00 - - vpmovzxbd %xmm16, %zmm19 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpmovzxbd (%rax), %zmm19 # CHECK-NEXT: - - - - - - - 1.00 - - vpmovzxbd %xmm16, %zmm19 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bw.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bw.s index ff22174f41e93..55581d5995f83 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bw.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bw.s @@ -373,16 +373,19 @@ vpmovswb %zmm16, %ymm19 vpmovswb %zmm16, (%rax) vpmovswb %zmm16, %ymm19 {k1} vpmovswb %zmm16, (%rax) {k1} +vpmovswb %zmm16, %ymm19 {z}{k1} vpmovuswb %zmm16, %ymm19 vpmovuswb %zmm16, (%rax) vpmovuswb %zmm16, %ymm19 {k1} vpmovuswb %zmm16, (%rax) {k1} +vpmovuswb %zmm16, %ymm19 {z}{k1} vpmovwb %zmm16, %ymm19 vpmovwb %zmm16, (%rax) vpmovwb %zmm16, %ymm19 {k1} vpmovwb %zmm16, (%rax) {k1} +vpmovwb %zmm16, %ymm19 {z}{k1} vpmovzxbw %ymm16, %zmm19 vpmovzxbw (%rax), %zmm19 @@ -919,14 +922,17 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 4 5 2.00 * vpmovswb %zmm16, (%rax) # CHECK-NEXT: 2 4 2.00 vpmovswb %zmm16, %ymm19 {%k1} # CHECK-NEXT: 4 5 2.00 * vpmovswb %zmm16, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovswb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: 2 4 2.00 vpmovuswb %zmm16, %ymm19 # CHECK-NEXT: 4 5 2.00 * vpmovuswb %zmm16, (%rax) # CHECK-NEXT: 2 4 2.00 vpmovuswb %zmm16, %ymm19 {%k1} # CHECK-NEXT: 4 5 2.00 * vpmovuswb %zmm16, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovuswb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: 2 4 2.00 vpmovwb %zmm16, %ymm19 # CHECK-NEXT: 4 5 2.00 * vpmovwb %zmm16, (%rax) # CHECK-NEXT: 2 4 2.00 vpmovwb %zmm16, %ymm19 {%k1} # CHECK-NEXT: 4 5 2.00 * vpmovwb %zmm16, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovwb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: 1 3 1.00 vpmovzxbw %ymm16, %zmm19 # CHECK-NEXT: 2 10 1.00 * vpmovzxbw (%rax), %zmm19 # CHECK-NEXT: 1 3 1.00 vpmovzxbw %ymm16, %zmm19 {%k1} @@ -1126,7 +1132,7 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 239.50 17.50 115.33 115.33 16.00 295.50 0.50 5.33 +# CHECK-NEXT: - - 239.50 17.50 115.33 115.33 16.00 301.50 0.50 5.33 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1438,14 +1444,17 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovswb %zmm16, (%rax) # CHECK-NEXT: - - - - - - - 2.00 - - vpmovswb %zmm16, %ymm19 {%k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovswb %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovswb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 2.00 - - vpmovuswb %zmm16, %ymm19 # CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovuswb %zmm16, (%rax) # CHECK-NEXT: - - - - - - - 2.00 - - vpmovuswb %zmm16, %ymm19 {%k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovuswb %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovuswb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 2.00 - - vpmovwb %zmm16, %ymm19 # CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovwb %zmm16, (%rax) # CHECK-NEXT: - - - - - - - 2.00 - - vpmovwb %zmm16, %ymm19 {%k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovwb %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovwb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 1.00 - - vpmovzxbw %ymm16, %zmm19 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpmovzxbw (%rax), %zmm19 # CHECK-NEXT: - - - - - - - 1.00 - - vpmovzxbw %ymm16, %zmm19 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bwvl.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bwvl.s index c861bc2300be9..c7ba4556f559b 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bwvl.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bwvl.s @@ -621,31 +621,37 @@ vpmovswb %xmm16, %xmm19 vpmovswb %xmm16, (%rax) vpmovswb %xmm16, %xmm19 {k1} vpmovswb %xmm16, (%rax) {k1} +vpmovswb %xmm16, %xmm19 {z}{k1} vpmovswb %ymm16, %xmm19 vpmovswb %ymm16, (%rax) vpmovswb %ymm16, %xmm19 {k1} vpmovswb %ymm16, (%rax) {k1} +vpmovswb %ymm16, %xmm19 {z}{k1} vpmovuswb %xmm16, %xmm19 vpmovuswb %xmm16, (%rax) vpmovuswb %xmm16, %xmm19 {k1} vpmovuswb %xmm16, (%rax) {k1} +vpmovuswb %xmm16, %xmm19 {z}{k1} vpmovuswb %ymm16, %xmm19 vpmovuswb %ymm16, (%rax) vpmovuswb %ymm16, %xmm19 {k1} vpmovuswb %ymm16, (%rax) {k1} +vpmovuswb %ymm16, %xmm19 {z}{k1} vpmovwb %xmm16, %xmm19 vpmovwb %xmm16, (%rax) vpmovwb %xmm16, %xmm19 {k1} vpmovwb %xmm16, (%rax) {k1} +vpmovwb %xmm16, %xmm19 {z}{k1} vpmovwb %ymm16, %xmm19 vpmovwb %ymm16, (%rax) vpmovwb %ymm16, %xmm19 {k1} vpmovwb %ymm16, (%rax) {k1} +vpmovwb %ymm16, %xmm19 {z}{k1} vpmovzxbw %xmm16, %xmm19 vpmovzxbw (%rax), %xmm19 @@ -1620,26 +1626,32 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 4 5 2.00 * vpmovswb %xmm16, (%rax) # CHECK-NEXT: 2 4 2.00 vpmovswb %xmm16, %xmm19 {%k1} # CHECK-NEXT: 4 5 2.00 * vpmovswb %xmm16, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovswb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: 2 4 2.00 vpmovswb %ymm16, %xmm19 # CHECK-NEXT: 4 5 2.00 * vpmovswb %ymm16, (%rax) # CHECK-NEXT: 2 4 2.00 vpmovswb %ymm16, %xmm19 {%k1} # CHECK-NEXT: 4 5 2.00 * vpmovswb %ymm16, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovswb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: 2 4 2.00 vpmovuswb %xmm16, %xmm19 # CHECK-NEXT: 4 5 2.00 * vpmovuswb %xmm16, (%rax) # CHECK-NEXT: 2 4 2.00 vpmovuswb %xmm16, %xmm19 {%k1} # CHECK-NEXT: 4 5 2.00 * vpmovuswb %xmm16, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovuswb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: 2 4 2.00 vpmovuswb %ymm16, %xmm19 # CHECK-NEXT: 4 5 2.00 * vpmovuswb %ymm16, (%rax) # CHECK-NEXT: 2 4 2.00 vpmovuswb %ymm16, %xmm19 {%k1} # CHECK-NEXT: 4 5 2.00 * vpmovuswb %ymm16, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovuswb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: 2 4 2.00 vpmovwb %xmm16, %xmm19 # CHECK-NEXT: 4 5 2.00 * vpmovwb %xmm16, (%rax) # CHECK-NEXT: 2 4 2.00 vpmovwb %xmm16, %xmm19 {%k1} # CHECK-NEXT: 4 5 2.00 * vpmovwb %xmm16, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovwb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: 2 4 2.00 vpmovwb %ymm16, %xmm19 # CHECK-NEXT: 4 5 2.00 * vpmovwb %ymm16, (%rax) # CHECK-NEXT: 2 4 2.00 vpmovwb %ymm16, %xmm19 {%k1} # CHECK-NEXT: 4 5 2.00 * vpmovwb %ymm16, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovwb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vpmovzxbw %xmm16, %xmm19 # CHECK-NEXT: 2 7 1.00 * vpmovzxbw (%rax), %xmm19 # CHECK-NEXT: 1 1 1.00 vpmovzxbw %xmm16, %xmm19 {%k1} @@ -2023,7 +2035,7 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 255.33 251.33 222.67 222.67 20.00 503.33 - 6.67 +# CHECK-NEXT: - - 255.33 251.33 222.67 222.67 20.00 515.33 - 6.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -2555,26 +2567,32 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovswb %xmm16, (%rax) # CHECK-NEXT: - - - - - - - 2.00 - - vpmovswb %xmm16, %xmm19 {%k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovswb %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovswb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 2.00 - - vpmovswb %ymm16, %xmm19 # CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovswb %ymm16, (%rax) # CHECK-NEXT: - - - - - - - 2.00 - - vpmovswb %ymm16, %xmm19 {%k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovswb %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovswb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 2.00 - - vpmovuswb %xmm16, %xmm19 # CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovuswb %xmm16, (%rax) # CHECK-NEXT: - - - - - - - 2.00 - - vpmovuswb %xmm16, %xmm19 {%k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovuswb %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovuswb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 2.00 - - vpmovuswb %ymm16, %xmm19 # CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovuswb %ymm16, (%rax) # CHECK-NEXT: - - - - - - - 2.00 - - vpmovuswb %ymm16, %xmm19 {%k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovuswb %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovuswb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 2.00 - - vpmovwb %xmm16, %xmm19 # CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovwb %xmm16, (%rax) # CHECK-NEXT: - - - - - - - 2.00 - - vpmovwb %xmm16, %xmm19 {%k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovwb %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovwb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 2.00 - - vpmovwb %ymm16, %xmm19 # CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovwb %ymm16, (%rax) # CHECK-NEXT: - - - - - - - 2.00 - - vpmovwb %ymm16, %xmm19 {%k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovwb %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovwb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 1.00 - - vpmovzxbw %xmm16, %xmm19 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpmovzxbw (%rax), %xmm19 # CHECK-NEXT: - - - - - - - 1.00 - - vpmovzxbw %xmm16, %xmm19 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s index de4d158e3b60c..f9668c7214bc7 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s @@ -1187,6 +1187,126 @@ vpgatherdd (%rax,%xmm1,2), %xmm2 {k1} vpgatherqq (%rax,%xmm1,2), %xmm2 {k1} vpgatherqd (%rax,%xmm1,2), %xmm2 {k1} +vpmovdb %xmm19, %xmm16 +vpmovdb %xmm19, (%rax) +vpmovdb %xmm19, %xmm16 {k1} +vpmovdb %xmm19, (%rax) {k1} +vpmovdb %xmm19, %xmm16 {k1}{z} + +vpmovdb %ymm19, %xmm16 +vpmovdb %ymm19, (%rax) +vpmovdb %ymm19, %xmm16 {k1} +vpmovdb %ymm19, (%rax) {k1} +vpmovdb %ymm19, %xmm16 {k1}{z} + +vpmovdw %xmm19, %xmm16 +vpmovdw %xmm19, (%rax) +vpmovdw %xmm19, %xmm16 {k1} +vpmovdw %xmm19, (%rax) {k1} +vpmovdw %xmm19, %xmm16 {k1}{z} + +vpmovdw %ymm19, %xmm16 +vpmovdw %ymm19, (%rax) +vpmovdw %ymm19, %xmm16 {k1} +vpmovdw %ymm19, (%rax) {k1} +vpmovdw %ymm19, %xmm16 {k1}{z} + +vpmovqb %xmm19, %xmm16 +vpmovqb %xmm19, (%rax) +vpmovqb %xmm19, %xmm16 {k1} +vpmovqb %xmm19, (%rax) {k1} +vpmovqb %xmm19, %xmm16 {k1}{z} + +vpmovqb %ymm19, %xmm16 +vpmovqb %ymm19, (%rax) +vpmovqb %ymm19, %xmm16 {k1} +vpmovqb %ymm19, (%rax) {k1} +vpmovqb %ymm19, %xmm16 {k1}{z} + +vpmovqd %xmm19, %xmm16 +vpmovqd %xmm19, (%rax) +vpmovqd %xmm19, %xmm16 {k1} +vpmovqd %xmm19, (%rax) {k1} +vpmovqd %xmm19, %xmm16 {k1}{z} + +vpmovqd %ymm19, %xmm16 +vpmovqd %ymm19, (%rax) +vpmovqd %ymm19, %xmm16 {k1} +vpmovqd %ymm19, (%rax) {k1} +vpmovqd %ymm19, %xmm16 {k1}{z} + +vpmovqw %xmm19, %xmm16 +vpmovqw %xmm19, (%rax) +vpmovqw %xmm19, %xmm16 {k1} +vpmovqw %xmm19, (%rax) {k1} +vpmovqw %xmm19, %xmm16 {k1}{z} + +vpmovqw %ymm19, %xmm16 +vpmovqw %ymm19, (%rax) +vpmovqw %ymm19, %xmm16 {k1} +vpmovqw %ymm19, (%rax) {k1} +vpmovqw %ymm19, %xmm16 {k1}{z} + +vpmovsdb %xmm19, %xmm16 +vpmovsdb %xmm19, (%rax) +vpmovsdb %xmm19, %xmm16 {k1} +vpmovsdb %xmm19, (%rax) {k1} +vpmovsdb %xmm19, %xmm16 {k1}{z} + +vpmovsdb %ymm19, %xmm16 +vpmovsdb %ymm19, (%rax) +vpmovsdb %ymm19, %xmm16 {k1} +vpmovsdb %ymm19, (%rax) {k1} +vpmovsdb %ymm19, %xmm16 {k1}{z} + +vpmovsdw %xmm19, %xmm16 +vpmovsdw %xmm19, (%rax) +vpmovsdw %xmm19, %xmm16 {k1} +vpmovsdw %xmm19, (%rax) {k1} +vpmovsdw %xmm19, %xmm16 {k1}{z} + +vpmovsdw %ymm19, %xmm16 +vpmovsdw %ymm19, (%rax) +vpmovsdw %ymm19, %xmm16 {k1} +vpmovsdw %ymm19, (%rax) {k1} +vpmovsdw %ymm19, %xmm16 {k1}{z} + +vpmovsqb %xmm19, %xmm16 +vpmovsqb %xmm19, (%rax) +vpmovsqb %xmm19, %xmm16 {k1} +vpmovsqb %xmm19, (%rax) {k1} +vpmovsqb %xmm19, %xmm16 {k1}{z} + +vpmovsqb %ymm19, %xmm16 +vpmovsqb %ymm19, (%rax) +vpmovsqb %ymm19, %xmm16 {k1} +vpmovsqb %ymm19, (%rax) {k1} +vpmovsqb %ymm19, %xmm16 {k1}{z} + +vpmovsqd %xmm19, %xmm16 +vpmovsqd %xmm19, (%rax) +vpmovsqd %xmm19, %xmm16 {k1} +vpmovsqd %xmm19, (%rax) {k1} +vpmovsqd %xmm19, %xmm16 {k1}{z} + +vpmovsqd %ymm19, %xmm16 +vpmovsqd %ymm19, (%rax) +vpmovsqd %ymm19, %xmm16 {k1} +vpmovsqd %ymm19, (%rax) {k1} +vpmovsqd %ymm19, %xmm16 {k1}{z} + +vpmovsqw %xmm19, %xmm16 +vpmovsqw %xmm19, (%rax) +vpmovsqw %xmm19, %xmm16 {k1} +vpmovsqw %xmm19, (%rax) {k1} +vpmovsqw %xmm19, %xmm16 {k1}{z} + +vpmovsqw %ymm19, %xmm16 +vpmovsqw %ymm19, (%rax) +vpmovsqw %ymm19, %xmm16 {k1} +vpmovsqw %ymm19, (%rax) {k1} +vpmovsqw %ymm19, %xmm16 {k1}{z} + vpmovsxbd %xmm16, %xmm19 vpmovsxbd (%rax), %xmm19 vpmovsxbd %xmm16, %xmm19 {k1} @@ -1257,6 +1377,66 @@ vpmovsxwq (%rax), %ymm19 {k1} vpmovsxwq %xmm16, %ymm19 {z}{k1} vpmovsxwq (%rax), %ymm19 {z}{k1} +vpmovusdb %xmm19, %xmm16 +vpmovusdb %xmm19, (%rax) +vpmovusdb %xmm19, %xmm16 {k1} +vpmovusdb %xmm19, (%rax) {k1} +vpmovusdb %xmm19, %xmm16 {k1}{z} + +vpmovusdb %ymm19, %xmm16 +vpmovusdb %ymm19, (%rax) +vpmovusdb %ymm19, %xmm16 {k1} +vpmovusdb %ymm19, (%rax) {k1} +vpmovusdb %ymm19, %xmm16 {k1}{z} + +vpmovusdw %xmm19, %xmm16 +vpmovusdw %xmm19, (%rax) +vpmovusdw %xmm19, %xmm16 {k1} +vpmovusdw %xmm19, (%rax) {k1} +vpmovusdw %xmm19, %xmm16 {k1}{z} + +vpmovusdw %ymm19, %xmm16 +vpmovusdw %ymm19, (%rax) +vpmovusdw %ymm19, %xmm16 {k1} +vpmovusdw %ymm19, (%rax) {k1} +vpmovusdw %ymm19, %xmm16 {k1}{z} + +vpmovusqb %xmm19, %xmm16 +vpmovusqb %xmm19, (%rax) +vpmovusqb %xmm19, %xmm16 {k1} +vpmovusqb %xmm19, (%rax) {k1} +vpmovusqb %xmm19, %xmm16 {k1}{z} + +vpmovusqb %ymm19, %xmm16 +vpmovusqb %ymm19, (%rax) +vpmovusqb %ymm19, %xmm16 {k1} +vpmovusqb %ymm19, (%rax) {k1} +vpmovusqb %ymm19, %xmm16 {k1}{z} + +vpmovusqd %xmm19, %xmm16 +vpmovusqd %xmm19, (%rax) +vpmovusqd %xmm19, %xmm16 {k1} +vpmovusqd %xmm19, (%rax) {k1} +vpmovusqd %xmm19, %xmm16 {k1}{z} + +vpmovusqd %ymm19, %xmm16 +vpmovusqd %ymm19, (%rax) +vpmovusqd %ymm19, %xmm16 {k1} +vpmovusqd %ymm19, (%rax) {k1} +vpmovusqd %ymm19, %xmm16 {k1}{z} + +vpmovusqw %xmm19, %xmm16 +vpmovusqw %xmm19, (%rax) +vpmovusqw %xmm19, %xmm16 {k1} +vpmovusqw %xmm19, (%rax) {k1} +vpmovusqw %xmm19, %xmm16 {k1}{z} + +vpmovusqw %ymm19, %xmm16 +vpmovusqw %ymm19, (%rax) +vpmovusqw %ymm19, %xmm16 {k1} +vpmovusqw %ymm19, (%rax) {k1} +vpmovusqw %ymm19, %xmm16 {k1}{z} + vpmovzxbd %xmm16, %xmm19 vpmovzxbd (%rax), %xmm19 vpmovzxbd %xmm16, %xmm19 {k1} @@ -2784,6 +2964,106 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 5 19 2.00 * vpgatherdd (%rax,%xmm1,2), %xmm2 {%k1} # CHECK-NEXT: 5 17 1.00 * vpgatherqq (%rax,%xmm1,2), %xmm2 {%k1} # CHECK-NEXT: 5 17 1.00 * vpgatherqd (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovdb %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovdb %xmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovdb %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovdb %ymm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovdw %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovdw %xmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovdw %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovdw %ymm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovqb %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovqb %xmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovqb %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovqb %ymm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vpmovqd %xmm19, %xmm16 +# CHECK-NEXT: 3 4 1.00 * vpmovqd %xmm19, (%rax) +# CHECK-NEXT: 1 3 1.00 vpmovqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 3 4 1.00 * vpmovqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 3 1.00 vpmovqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vpmovqd %ymm19, %xmm16 +# CHECK-NEXT: 3 4 1.00 * vpmovqd %ymm19, (%rax) +# CHECK-NEXT: 1 3 1.00 vpmovqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 3 4 1.00 * vpmovqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 3 1.00 vpmovqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovqw %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovqw %xmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovqw %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovqw %ymm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovqw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsdb %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsdb %xmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsdb %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsdb %ymm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsdw %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsdw %xmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsdw %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsdw %ymm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsqb %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsqb %xmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsqb %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsqb %ymm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsqd %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsqd %xmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsqd %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsqd %ymm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsqw %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsqw %xmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovsqw %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovsqw %ymm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovsqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovsqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovsqw %ymm19, %xmm16 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vpmovsxbd %xmm16, %xmm19 # CHECK-NEXT: 2 7 1.00 * vpmovsxbd (%rax), %xmm19 # CHECK-NEXT: 1 1 1.00 vpmovsxbd %xmm16, %xmm19 {%k1} @@ -2844,6 +3124,56 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 2 10 1.00 * vpmovsxwq (%rax), %ymm19 {%k1} # CHECK-NEXT: 1 3 1.00 vpmovsxwq %xmm16, %ymm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vpmovsxwq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusdb %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusdb %xmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovusdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovusdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusdb %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusdb %ymm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovusdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovusdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusdw %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusdw %xmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovusdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovusdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusdw %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusdw %ymm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovusdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovusdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusqb %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusqb %xmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovusqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovusqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusqb %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusqb %ymm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovusqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovusqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusqd %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusqd %xmm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovusqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovusqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 2.00 vpmovusqd %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusqd %ymm19, (%rax) +# CHECK-NEXT: 2 4 2.00 vpmovusqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 2.00 vpmovusqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vpmovusqw %xmm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusqw %xmm19, (%rax) +# CHECK-NEXT: 1 3 1.00 vpmovusqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 3 1.00 vpmovusqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vpmovusqw %ymm19, %xmm16 +# CHECK-NEXT: 4 5 2.00 * vpmovusqw %ymm19, (%rax) +# CHECK-NEXT: 1 3 1.00 vpmovusqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 4 5 2.00 * vpmovusqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 3 1.00 vpmovusqw %ymm19, %xmm16 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vpmovzxbd %xmm16, %xmm19 # CHECK-NEXT: 2 7 1.00 * vpmovzxbd (%rax), %xmm19 # CHECK-NEXT: 1 1 1.00 vpmovzxbd %xmm16, %xmm19 {%k1} @@ -3271,7 +3601,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 423.00 463.00 359.00 522.83 522.83 88.00 802.00 12.00 29.33 +# CHECK-NEXT: - 423.00 463.00 359.00 542.83 542.83 148.00 1086.00 12.00 49.33 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -4327,6 +4657,106 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - 1.58 0.58 2.00 2.00 - 0.58 0.25 - vpgatherdd (%rax,%xmm1,2), %xmm2 {%k1} # CHECK-NEXT: - - 1.58 0.58 1.00 1.00 - 0.58 0.25 - vpgatherqq (%rax,%xmm1,2), %xmm2 {%k1} # CHECK-NEXT: - - 1.58 0.58 1.00 1.00 - 0.58 0.25 - vpgatherqd (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovdb %xmm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovdb %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovdb %ymm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovdb %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovdw %xmm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovdw %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovdw %ymm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovdw %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovqb %xmm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovqb %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovqb %ymm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovqb %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 1.00 - - vpmovqd %xmm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 vpmovqd %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 1.00 - - vpmovqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 vpmovqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 1.00 - - vpmovqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 1.00 - - vpmovqd %ymm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 vpmovqd %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 1.00 - - vpmovqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 vpmovqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 1.00 - - vpmovqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovqw %xmm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovqw %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovqw %ymm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovqw %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovqw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsdb %xmm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsdb %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsdb %ymm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsdb %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsdw %xmm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsdw %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsdw %ymm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsdw %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsqb %xmm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsqb %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsqb %ymm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsqb %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsqd %xmm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsqd %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsqd %ymm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsqd %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsqw %xmm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsqw %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsqw %ymm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsqw %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovsqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovsqw %ymm19, %xmm16 {%k1} {z} # CHECK-NEXT: - - - - - - - 1.00 - - vpmovsxbd %xmm16, %xmm19 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpmovsxbd (%rax), %xmm19 # CHECK-NEXT: - - - - - - - 1.00 - - vpmovsxbd %xmm16, %xmm19 {%k1} @@ -4387,6 +4817,56 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpmovsxwq (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - - - 1.00 - - vpmovsxwq %xmm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpmovsxwq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusdb %xmm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusdb %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusdb %ymm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusdb %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusdw %xmm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusdw %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusdw %ymm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusdw %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusqb %xmm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusqb %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusqb %ymm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusqb %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusqd %xmm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusqd %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusqd %ymm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusqd %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 2.00 - - vpmovusqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 1.00 - - vpmovusqw %xmm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusqw %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - 1.00 - - vpmovusqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 1.00 - - vpmovusqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - 1.00 - - vpmovusqw %ymm19, %xmm16 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusqw %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - 1.00 - - vpmovusqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 2.00 - 0.33 vpmovusqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - 1.00 - - vpmovusqw %ymm19, %xmm16 {%k1} {z} # CHECK-NEXT: - - - - - - - 1.00 - - vpmovzxbd %xmm16, %xmm19 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpmovzxbd (%rax), %xmm19 # CHECK-NEXT: - - - - - - - 1.00 - - vpmovzxbd %xmm16, %xmm19 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s index 5a5cb305c4258..fee636be7ac9d 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s @@ -1506,29 +1506,29 @@ vzeroupper # CHECK-NEXT: 1 1 0.25 vpminuw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpminuw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 1.00 vpmovmskb %xmm0, %ecx -# CHECK-NEXT: 1 4 2.00 vpmovsxbd %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpmovsxbd %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpmovsxbd (%rax), %xmm2 -# CHECK-NEXT: 1 4 2.00 vpmovsxbq %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpmovsxbq %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpmovsxbq (%rax), %xmm2 -# CHECK-NEXT: 1 4 2.00 vpmovsxbw %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpmovsxbw %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpmovsxbw (%rax), %xmm2 -# CHECK-NEXT: 1 4 2.00 vpmovsxdq %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpmovsxdq %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpmovsxdq (%rax), %xmm2 -# CHECK-NEXT: 1 4 2.00 vpmovsxwd %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpmovsxwd %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpmovsxwd (%rax), %xmm2 -# CHECK-NEXT: 1 4 2.00 vpmovsxwq %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpmovsxwq %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpmovsxwq (%rax), %xmm2 -# CHECK-NEXT: 1 4 2.00 vpmovzxbd %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpmovzxbd %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpmovzxbd (%rax), %xmm2 -# CHECK-NEXT: 1 4 2.00 vpmovzxbq %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpmovzxbq %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpmovzxbq (%rax), %xmm2 -# CHECK-NEXT: 1 4 2.00 vpmovzxbw %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpmovzxbw %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpmovzxbw (%rax), %xmm2 -# CHECK-NEXT: 1 4 2.00 vpmovzxdq %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpmovzxdq %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpmovzxdq (%rax), %xmm2 -# CHECK-NEXT: 1 4 2.00 vpmovzxwd %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpmovzxwd %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpmovzxwd (%rax), %xmm2 -# CHECK-NEXT: 1 4 2.00 vpmovzxwq %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpmovzxwq %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpmovzxwq (%rax), %xmm2 # CHECK-NEXT: 1 3 0.50 vpmuldq %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 10 0.50 * vpmuldq (%rax), %xmm1, %xmm2 @@ -1749,7 +1749,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] -# CHECK-NEXT: 1.33 1.33 1.33 16.50 16.50 16.50 16.50 - 205.25 414.08 288.58 158.08 208.50 208.50 65.00 119.67 119.67 119.67 107.00 107.00 107.00 19.00 19.00 +# CHECK-NEXT: 1.33 1.33 1.33 16.50 16.50 16.50 16.50 - 205.25 396.08 270.58 158.08 208.50 208.50 65.00 119.67 119.67 119.67 107.00 107.00 107.00 19.00 19.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: @@ -2229,29 +2229,29 @@ vzeroupper # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpminuw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpminuw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - - - - - - vpmovmskb %xmm0, %ecx -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxbd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsxbd %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxbd (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxbq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsxbq %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxbq (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxbw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsxbw %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxbw (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxdq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsxdq %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxdq (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxwd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsxwd %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxwd (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxwq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsxwq %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxwq (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxbd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxbd %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbd (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxbq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxbq %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbq (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxbw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxbw %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbw (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxdq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxdq %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxdq (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxwd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxwd %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxwd (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxwq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxwq %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxwq (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - vpmuldq %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmuldq (%rax), %xmm1, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s index c8f07e94ad6b7..2c1af86822d09 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s @@ -636,6 +636,66 @@ vpgatherdd (%rax,%zmm1,2), %zmm2 {k1} vpgatherqq (%rax,%zmm1,2), %zmm2 {k1} vpgatherqd (%rax,%zmm1,2), %ymm2 {k1} +vpmovdb %zmm19, %xmm16 +vpmovdb %zmm19, (%rax) +vpmovdb %zmm19, %xmm16 {k1} +vpmovdb %zmm19, (%rax) {k1} +vpmovdb %zmm19, %xmm16 {k1}{z} + +vpmovdw %zmm19, %ymm16 +vpmovdw %zmm19, (%rax) +vpmovdw %zmm19, %ymm16 {k1} +vpmovdw %zmm19, (%rax) {k1} +vpmovdw %zmm19, %ymm16 {k1}{z} + +vpmovqb %zmm19, %xmm16 +vpmovqb %zmm19, (%rax) +vpmovqb %zmm19, %xmm16 {k1} +vpmovqb %zmm19, (%rax) {k1} +vpmovqb %zmm19, %xmm16 {k1}{z} + +vpmovqd %zmm19, %ymm16 +vpmovqd %zmm19, (%rax) +vpmovqd %zmm19, %ymm16 {k1} +vpmovqd %zmm19, (%rax) {k1} +vpmovqd %zmm19, %ymm16 {k1}{z} + +vpmovqw %zmm19, %xmm16 +vpmovqw %zmm19, (%rax) +vpmovqw %zmm19, %xmm16 {k1} +vpmovqw %zmm19, (%rax) {k1} +vpmovqw %zmm19, %xmm16 {k1}{z} + +vpmovsdb %zmm19, %xmm16 +vpmovsdb %zmm19, (%rax) +vpmovsdb %zmm19, %xmm16 {k1} +vpmovsdb %zmm19, (%rax) {k1} +vpmovsdb %zmm19, %xmm16 {k1}{z} + +vpmovsdw %zmm19, %ymm16 +vpmovsdw %zmm19, (%rax) +vpmovsdw %zmm19, %ymm16 {k1} +vpmovsdw %zmm19, (%rax) {k1} +vpmovsdw %zmm19, %ymm16 {k1}{z} + +vpmovsqb %zmm19, %xmm16 +vpmovsqb %zmm19, (%rax) +vpmovsqb %zmm19, %xmm16 {k1} +vpmovsqb %zmm19, (%rax) {k1} +vpmovsqb %zmm19, %xmm16 {k1}{z} + +vpmovsqd %zmm19, %ymm16 +vpmovsqd %zmm19, (%rax) +vpmovsqd %zmm19, %ymm16 {k1} +vpmovsqd %zmm19, (%rax) {k1} +vpmovsqd %zmm19, %ymm16 {k1}{z} + +vpmovsqw %zmm19, %xmm16 +vpmovsqw %zmm19, (%rax) +vpmovsqw %zmm19, %xmm16 {k1} +vpmovsqw %zmm19, (%rax) {k1} +vpmovsqw %zmm19, %xmm16 {k1}{z} + vpmovsxbd %xmm16, %zmm19 vpmovsxbd (%rax), %zmm19 vpmovsxbd %xmm16, %zmm19 {k1} @@ -671,6 +731,36 @@ vpmovsxwq (%rax), %zmm19 {k1} vpmovsxwq %xmm16, %zmm19 {z}{k1} vpmovsxwq (%rax), %zmm19 {z}{k1} +vpmovusdb %zmm19, %xmm16 +vpmovusdb %zmm19, (%rax) +vpmovusdb %zmm19, %xmm16 {k1} +vpmovusdb %zmm19, (%rax) {k1} +vpmovusdb %zmm19, %xmm16 {k1}{z} + +vpmovusdw %zmm19, %ymm16 +vpmovusdw %zmm19, (%rax) +vpmovusdw %zmm19, %ymm16 {k1} +vpmovusdw %zmm19, (%rax) {k1} +vpmovusdw %zmm19, %ymm16 {k1}{z} + +vpmovusqb %zmm19, %xmm16 +vpmovusqb %zmm19, (%rax) +vpmovusqb %zmm19, %xmm16 {k1} +vpmovusqb %zmm19, (%rax) {k1} +vpmovusqb %zmm19, %xmm16 {k1}{z} + +vpmovusqd %zmm19, %ymm16 +vpmovusqd %zmm19, (%rax) +vpmovusqd %zmm19, %ymm16 {k1} +vpmovusqd %zmm19, (%rax) {k1} +vpmovusqd %zmm19, %ymm16 {k1}{z} + +vpmovusqw %zmm19, %xmm16 +vpmovusqw %zmm19, (%rax) +vpmovusqw %zmm19, %xmm16 {k1} +vpmovusqw %zmm19, (%rax) {k1} +vpmovusqw %zmm19, %xmm16 {k1}{z} + vpmovzxbd %xmm16, %zmm19 vpmovzxbd (%rax), %zmm19 vpmovzxbd %xmm16, %zmm19 {k1} @@ -1646,6 +1736,56 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 5 0.33 * vpgatherdd (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: 1 5 0.33 * vpgatherqq (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: 1 5 0.33 * vpgatherqd (%rax,%zmm1,2), %ymm2 {%k1} +# CHECK-NEXT: 1 5 2.50 vpmovdb %zmm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovdb %zmm19, (%rax) +# CHECK-NEXT: 1 5 2.50 vpmovdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 5 2.50 vpmovdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 1.50 vpmovdw %zmm19, %ymm16 +# CHECK-NEXT: 1 11 1.50 * vpmovdw %zmm19, (%rax) +# CHECK-NEXT: 2 4 1.50 vpmovdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 1.50 vpmovdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 1 5 2.50 vpmovqb %zmm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovqb %zmm19, (%rax) +# CHECK-NEXT: 1 5 2.50 vpmovqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 5 2.50 vpmovqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 1.50 vpmovqd %zmm19, %ymm16 +# CHECK-NEXT: 1 11 1.50 * vpmovqd %zmm19, (%rax) +# CHECK-NEXT: 2 4 1.50 vpmovqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 1.50 vpmovqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 1 5 2.50 vpmovqw %zmm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovqw %zmm19, (%rax) +# CHECK-NEXT: 1 5 2.50 vpmovqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 5 2.50 vpmovqw %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 5 2.50 vpmovsdb %zmm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovsdb %zmm19, (%rax) +# CHECK-NEXT: 1 5 2.50 vpmovsdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovsdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 5 2.50 vpmovsdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 1.50 vpmovsdw %zmm19, %ymm16 +# CHECK-NEXT: 1 11 1.50 * vpmovsdw %zmm19, (%rax) +# CHECK-NEXT: 2 4 1.50 vpmovsdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovsdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 1.50 vpmovsdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 1 5 2.50 vpmovsqb %zmm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovsqb %zmm19, (%rax) +# CHECK-NEXT: 1 5 2.50 vpmovsqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovsqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 5 2.50 vpmovsqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 1.50 vpmovsqd %zmm19, %ymm16 +# CHECK-NEXT: 1 11 1.50 * vpmovsqd %zmm19, (%rax) +# CHECK-NEXT: 2 4 1.50 vpmovsqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovsqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 1.50 vpmovsqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 1 5 2.50 vpmovsqw %zmm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovsqw %zmm19, (%rax) +# CHECK-NEXT: 1 5 2.50 vpmovsqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovsqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 5 2.50 vpmovsqw %zmm19, %xmm16 {%k1} {z} # CHECK-NEXT: 1 4 2.00 vpmovsxbd %xmm16, %zmm19 # CHECK-NEXT: 1 11 1.50 * vpmovsxbd (%rax), %zmm19 # CHECK-NEXT: 1 4 2.00 vpmovsxbd %xmm16, %zmm19 {%k1} @@ -1676,6 +1816,31 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 11 1.50 * vpmovsxwq (%rax), %zmm19 {%k1} # CHECK-NEXT: 1 4 2.00 vpmovsxwq %xmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 1 11 1.50 * vpmovsxwq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1 5 2.50 vpmovusdb %zmm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovusdb %zmm19, (%rax) +# CHECK-NEXT: 1 5 2.50 vpmovusdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovusdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 5 2.50 vpmovusdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 1.50 vpmovusdw %zmm19, %ymm16 +# CHECK-NEXT: 1 11 1.50 * vpmovusdw %zmm19, (%rax) +# CHECK-NEXT: 2 4 1.50 vpmovusdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovusdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 1.50 vpmovusdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 1 5 2.50 vpmovusqb %zmm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovusqb %zmm19, (%rax) +# CHECK-NEXT: 1 5 2.50 vpmovusqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovusqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 5 2.50 vpmovusqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 1.50 vpmovusqd %zmm19, %ymm16 +# CHECK-NEXT: 1 11 1.50 * vpmovusqd %zmm19, (%rax) +# CHECK-NEXT: 2 4 1.50 vpmovusqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovusqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 1.50 vpmovusqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: 1 5 2.50 vpmovusqw %zmm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovusqw %zmm19, (%rax) +# CHECK-NEXT: 1 5 2.50 vpmovusqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovusqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: 1 5 2.50 vpmovusqw %zmm19, %xmm16 {%k1} {z} # CHECK-NEXT: 1 4 2.00 vpmovzxbd %xmm16, %zmm19 # CHECK-NEXT: 1 11 1.50 * vpmovzxbd (%rax), %zmm19 # CHECK-NEXT: 1 4 2.00 vpmovzxbd %xmm16, %zmm19 {%k1} @@ -2068,7 +2233,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] -# CHECK-NEXT: 5.33 5.33 5.33 - - - - - 219.50 1059.00 616.50 351.00 297.50 297.50 17.00 205.67 205.67 205.67 194.67 194.67 194.67 16.50 16.50 +# CHECK-NEXT: 5.33 5.33 5.33 - - - - - 219.50 1198.50 756.00 351.00 312.50 312.50 17.00 215.67 215.67 215.67 204.67 204.67 204.67 16.50 16.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: @@ -2628,6 +2793,56 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpgatherdd (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpgatherqq (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpgatherqd (%rax,%zmm1,2), %ymm2 {%k1} +# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovdb %zmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovdb %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovdw %zmm19, %ymm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovdw %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovqb %zmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovqb %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovqd %zmm19, %ymm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovqd %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovqw %zmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovqw %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovqw %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovsdb %zmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsdb %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovsdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovsdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovsdw %zmm19, %ymm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsdw %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovsdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovsdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovsqb %zmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsqb %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovsqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovsqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovsqd %zmm19, %ymm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsqd %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovsqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovsqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovsqw %zmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsqw %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovsqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovsqw %zmm19, %xmm16 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxbd %xmm16, %zmm19 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxbd (%rax), %zmm19 # CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxbd %xmm16, %zmm19 {%k1} @@ -2658,6 +2873,31 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxwq (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxwq %xmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxwq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovusdb %zmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusdb %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovusdb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusdb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovusdb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovusdw %zmm19, %ymm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusdw %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovusdw %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusdw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovusdw %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovusqb %zmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusqb %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovusqb %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusqb %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovusqb %zmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovusqd %zmm19, %ymm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusqd %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovusqd %zmm19, %ymm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusqd %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovusqd %zmm19, %ymm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovusqw %zmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusqw %zmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovusqw %zmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusqw %zmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 2.50 2.50 - - - - - - - - - - - - vpmovusqw %zmm19, %xmm16 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxbd %xmm16, %zmm19 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbd (%rax), %zmm19 # CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxbd %xmm16, %zmm19 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512bw.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512bw.s index 4585ed0f2ed74..64ea2597e1213 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512bw.s +++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512bw.s @@ -373,16 +373,19 @@ vpmovswb %zmm16, %ymm19 vpmovswb %zmm16, (%rax) vpmovswb %zmm16, %ymm19 {k1} vpmovswb %zmm16, (%rax) {k1} +vpmovswb %zmm16, %ymm19 {z}{k1} vpmovuswb %zmm16, %ymm19 vpmovuswb %zmm16, (%rax) vpmovuswb %zmm16, %ymm19 {k1} vpmovuswb %zmm16, (%rax) {k1} +vpmovuswb %zmm16, %ymm19 {z}{k1} vpmovwb %zmm16, %ymm19 vpmovwb %zmm16, (%rax) vpmovwb %zmm16, %ymm19 {k1} vpmovwb %zmm16, (%rax) {k1} +vpmovwb %zmm16, %ymm19 {z}{k1} vpmovzxbw %ymm16, %zmm19 vpmovzxbw (%rax), %zmm19 @@ -919,14 +922,17 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 11 1.50 * vpmovswb %zmm16, (%rax) # CHECK-NEXT: 2 4 1.50 vpmovswb %zmm16, %ymm19 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovswb %zmm16, (%rax) {%k1} +# CHECK-NEXT: 2 4 1.50 vpmovswb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: 2 4 1.50 vpmovuswb %zmm16, %ymm19 # CHECK-NEXT: 1 11 1.50 * vpmovuswb %zmm16, (%rax) # CHECK-NEXT: 2 4 1.50 vpmovuswb %zmm16, %ymm19 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovuswb %zmm16, (%rax) {%k1} +# CHECK-NEXT: 2 4 1.50 vpmovuswb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: 2 4 1.50 vpmovwb %zmm16, %ymm19 # CHECK-NEXT: 1 11 1.50 * vpmovwb %zmm16, (%rax) # CHECK-NEXT: 2 4 1.50 vpmovwb %zmm16, %ymm19 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovwb %zmm16, (%rax) {%k1} +# CHECK-NEXT: 2 4 1.50 vpmovwb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: 1 4 2.00 vpmovzxbw %ymm16, %zmm19 # CHECK-NEXT: 1 11 1.50 * vpmovzxbw (%rax), %zmm19 # CHECK-NEXT: 1 4 2.00 vpmovzxbw %ymm16, %zmm19 {%k1} @@ -1139,7 +1145,7 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] -# CHECK-NEXT: 0.67 0.67 0.67 2.00 2.00 2.00 2.00 - 177.50 309.00 274.50 161.00 123.00 123.00 6.00 77.33 77.33 77.33 75.33 75.33 75.33 3.00 3.00 +# CHECK-NEXT: 0.67 0.67 0.67 2.00 2.00 2.00 2.00 - 177.50 313.50 279.00 161.00 123.00 123.00 6.00 77.33 77.33 77.33 75.33 75.33 75.33 3.00 3.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: @@ -1451,14 +1457,17 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovswb %zmm16, (%rax) # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovswb %zmm16, %ymm19 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovswb %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovswb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovuswb %zmm16, %ymm19 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovuswb %zmm16, (%rax) # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovuswb %zmm16, %ymm19 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovuswb %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovuswb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovwb %zmm16, %ymm19 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovwb %zmm16, (%rax) # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovwb %zmm16, %ymm19 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovwb %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovwb %zmm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxbw %ymm16, %zmm19 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbw (%rax), %zmm19 # CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxbw %ymm16, %zmm19 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512bwvl.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512bwvl.s index 8a62f803fce07..a298dd69ee9b3 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512bwvl.s +++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512bwvl.s @@ -621,31 +621,37 @@ vpmovswb %xmm16, %xmm19 vpmovswb %xmm16, (%rax) vpmovswb %xmm16, %xmm19 {k1} vpmovswb %xmm16, (%rax) {k1} +vpmovswb %xmm16, %xmm19 {z}{k1} vpmovswb %ymm16, %xmm19 vpmovswb %ymm16, (%rax) vpmovswb %ymm16, %xmm19 {k1} vpmovswb %ymm16, (%rax) {k1} +vpmovswb %ymm16, %xmm19 {z}{k1} vpmovuswb %xmm16, %xmm19 vpmovuswb %xmm16, (%rax) vpmovuswb %xmm16, %xmm19 {k1} vpmovuswb %xmm16, (%rax) {k1} +vpmovuswb %xmm16, %xmm19 {z}{k1} vpmovuswb %ymm16, %xmm19 vpmovuswb %ymm16, (%rax) vpmovuswb %ymm16, %xmm19 {k1} vpmovuswb %ymm16, (%rax) {k1} +vpmovuswb %ymm16, %xmm19 {z}{k1} vpmovwb %xmm16, %xmm19 vpmovwb %xmm16, (%rax) vpmovwb %xmm16, %xmm19 {k1} vpmovwb %xmm16, (%rax) {k1} +vpmovwb %xmm16, %xmm19 {z}{k1} vpmovwb %ymm16, %xmm19 vpmovwb %ymm16, (%rax) vpmovwb %ymm16, %xmm19 {k1} vpmovwb %ymm16, (%rax) {k1} +vpmovwb %ymm16, %xmm19 {z}{k1} vpmovzxbw %xmm16, %xmm19 vpmovzxbw (%rax), %xmm19 @@ -1620,26 +1626,32 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1 11 1.50 * vpmovswb %xmm16, (%rax) # CHECK-NEXT: 1 2 0.50 vpmovswb %xmm16, %xmm19 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovswb %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovswb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: 1 2 0.50 vpmovswb %ymm16, %xmm19 # CHECK-NEXT: 1 11 1.50 * vpmovswb %ymm16, (%rax) # CHECK-NEXT: 1 2 0.50 vpmovswb %ymm16, %xmm19 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovswb %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovswb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: 1 2 0.50 vpmovuswb %xmm16, %xmm19 # CHECK-NEXT: 1 11 1.50 * vpmovuswb %xmm16, (%rax) # CHECK-NEXT: 1 2 0.50 vpmovuswb %xmm16, %xmm19 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovuswb %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovuswb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: 1 2 0.50 vpmovuswb %ymm16, %xmm19 # CHECK-NEXT: 1 11 1.50 * vpmovuswb %ymm16, (%rax) # CHECK-NEXT: 1 2 0.50 vpmovuswb %ymm16, %xmm19 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovuswb %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovuswb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: 1 2 0.50 vpmovwb %xmm16, %xmm19 # CHECK-NEXT: 1 11 1.50 * vpmovwb %xmm16, (%rax) # CHECK-NEXT: 1 2 0.50 vpmovwb %xmm16, %xmm19 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovwb %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovwb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: 1 2 0.50 vpmovwb %ymm16, %xmm19 # CHECK-NEXT: 1 11 1.50 * vpmovwb %ymm16, (%rax) # CHECK-NEXT: 1 2 0.50 vpmovwb %ymm16, %xmm19 {%k1} # CHECK-NEXT: 1 11 1.50 * vpmovwb %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovwb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: 1 2 0.50 vpmovzxbw %xmm16, %xmm19 # CHECK-NEXT: 1 8 0.50 * vpmovzxbw (%rax), %xmm19 # CHECK-NEXT: 1 2 0.50 vpmovzxbw %xmm16, %xmm19 {%k1} @@ -2036,7 +2048,7 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] -# CHECK-NEXT: - - - 4.00 4.00 4.00 4.00 - 233.00 408.50 297.50 140.00 226.00 226.00 8.00 150.67 150.67 150.67 148.00 148.00 148.00 4.00 4.00 +# CHECK-NEXT: - - - 4.00 4.00 4.00 4.00 - 233.00 411.50 300.50 140.00 226.00 226.00 8.00 150.67 150.67 150.67 148.00 148.00 148.00 4.00 4.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: @@ -2568,26 +2580,32 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovswb %xmm16, (%rax) # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovswb %xmm16, %xmm19 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovswb %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovswb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovswb %ymm16, %xmm19 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovswb %ymm16, (%rax) # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovswb %ymm16, %xmm19 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovswb %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovswb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovuswb %xmm16, %xmm19 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovuswb %xmm16, (%rax) # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovuswb %xmm16, %xmm19 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovuswb %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovuswb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovuswb %ymm16, %xmm19 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovuswb %ymm16, (%rax) # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovuswb %ymm16, %xmm19 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovuswb %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovuswb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovwb %xmm16, %xmm19 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovwb %xmm16, (%rax) # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovwb %xmm16, %xmm19 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovwb %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovwb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovwb %ymm16, %xmm19 # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovwb %ymm16, (%rax) # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovwb %ymm16, %xmm19 {%k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovwb %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovwb %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxbw %xmm16, %xmm19 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbw (%rax), %xmm19 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxbw %xmm16, %xmm19 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vl.s index 5565eb740a1c6..ce8b986b0f8b5 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vl.s +++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vl.s @@ -1187,6 +1187,126 @@ vpgatherdd (%rax,%xmm1,2), %xmm2 {k1} vpgatherqq (%rax,%xmm1,2), %xmm2 {k1} vpgatherqd (%rax,%xmm1,2), %xmm2 {k1} +vpmovdb %xmm19, %xmm16 +vpmovdb %xmm19, (%rax) +vpmovdb %xmm19, %xmm16 {k1} +vpmovdb %xmm19, (%rax) {k1} +vpmovdb %xmm19, %xmm16 {k1}{z} + +vpmovdb %ymm19, %xmm16 +vpmovdb %ymm19, (%rax) +vpmovdb %ymm19, %xmm16 {k1} +vpmovdb %ymm19, (%rax) {k1} +vpmovdb %ymm19, %xmm16 {k1}{z} + +vpmovdw %xmm19, %xmm16 +vpmovdw %xmm19, (%rax) +vpmovdw %xmm19, %xmm16 {k1} +vpmovdw %xmm19, (%rax) {k1} +vpmovdw %xmm19, %xmm16 {k1}{z} + +vpmovdw %ymm19, %xmm16 +vpmovdw %ymm19, (%rax) +vpmovdw %ymm19, %xmm16 {k1} +vpmovdw %ymm19, (%rax) {k1} +vpmovdw %ymm19, %xmm16 {k1}{z} + +vpmovqb %xmm19, %xmm16 +vpmovqb %xmm19, (%rax) +vpmovqb %xmm19, %xmm16 {k1} +vpmovqb %xmm19, (%rax) {k1} +vpmovqb %xmm19, %xmm16 {k1}{z} + +vpmovqb %ymm19, %xmm16 +vpmovqb %ymm19, (%rax) +vpmovqb %ymm19, %xmm16 {k1} +vpmovqb %ymm19, (%rax) {k1} +vpmovqb %ymm19, %xmm16 {k1}{z} + +vpmovqd %xmm19, %xmm16 +vpmovqd %xmm19, (%rax) +vpmovqd %xmm19, %xmm16 {k1} +vpmovqd %xmm19, (%rax) {k1} +vpmovqd %xmm19, %xmm16 {k1}{z} + +vpmovqd %ymm19, %xmm16 +vpmovqd %ymm19, (%rax) +vpmovqd %ymm19, %xmm16 {k1} +vpmovqd %ymm19, (%rax) {k1} +vpmovqd %ymm19, %xmm16 {k1}{z} + +vpmovqw %xmm19, %xmm16 +vpmovqw %xmm19, (%rax) +vpmovqw %xmm19, %xmm16 {k1} +vpmovqw %xmm19, (%rax) {k1} +vpmovqw %xmm19, %xmm16 {k1}{z} + +vpmovqw %ymm19, %xmm16 +vpmovqw %ymm19, (%rax) +vpmovqw %ymm19, %xmm16 {k1} +vpmovqw %ymm19, (%rax) {k1} +vpmovqw %ymm19, %xmm16 {k1}{z} + +vpmovsdb %xmm19, %xmm16 +vpmovsdb %xmm19, (%rax) +vpmovsdb %xmm19, %xmm16 {k1} +vpmovsdb %xmm19, (%rax) {k1} +vpmovsdb %xmm19, %xmm16 {k1}{z} + +vpmovsdb %ymm19, %xmm16 +vpmovsdb %ymm19, (%rax) +vpmovsdb %ymm19, %xmm16 {k1} +vpmovsdb %ymm19, (%rax) {k1} +vpmovsdb %ymm19, %xmm16 {k1}{z} + +vpmovsdw %xmm19, %xmm16 +vpmovsdw %xmm19, (%rax) +vpmovsdw %xmm19, %xmm16 {k1} +vpmovsdw %xmm19, (%rax) {k1} +vpmovsdw %xmm19, %xmm16 {k1}{z} + +vpmovsdw %ymm19, %xmm16 +vpmovsdw %ymm19, (%rax) +vpmovsdw %ymm19, %xmm16 {k1} +vpmovsdw %ymm19, (%rax) {k1} +vpmovsdw %ymm19, %xmm16 {k1}{z} + +vpmovsqb %xmm19, %xmm16 +vpmovsqb %xmm19, (%rax) +vpmovsqb %xmm19, %xmm16 {k1} +vpmovsqb %xmm19, (%rax) {k1} +vpmovsqb %xmm19, %xmm16 {k1}{z} + +vpmovsqb %ymm19, %xmm16 +vpmovsqb %ymm19, (%rax) +vpmovsqb %ymm19, %xmm16 {k1} +vpmovsqb %ymm19, (%rax) {k1} +vpmovsqb %ymm19, %xmm16 {k1}{z} + +vpmovsqd %xmm19, %xmm16 +vpmovsqd %xmm19, (%rax) +vpmovsqd %xmm19, %xmm16 {k1} +vpmovsqd %xmm19, (%rax) {k1} +vpmovsqd %xmm19, %xmm16 {k1}{z} + +vpmovsqd %ymm19, %xmm16 +vpmovsqd %ymm19, (%rax) +vpmovsqd %ymm19, %xmm16 {k1} +vpmovsqd %ymm19, (%rax) {k1} +vpmovsqd %ymm19, %xmm16 {k1}{z} + +vpmovsqw %xmm19, %xmm16 +vpmovsqw %xmm19, (%rax) +vpmovsqw %xmm19, %xmm16 {k1} +vpmovsqw %xmm19, (%rax) {k1} +vpmovsqw %xmm19, %xmm16 {k1}{z} + +vpmovsqw %ymm19, %xmm16 +vpmovsqw %ymm19, (%rax) +vpmovsqw %ymm19, %xmm16 {k1} +vpmovsqw %ymm19, (%rax) {k1} +vpmovsqw %ymm19, %xmm16 {k1}{z} + vpmovsxbd %xmm16, %xmm19 vpmovsxbd (%rax), %xmm19 vpmovsxbd %xmm16, %xmm19 {k1} @@ -1257,6 +1377,66 @@ vpmovsxwq (%rax), %ymm19 {k1} vpmovsxwq %xmm16, %ymm19 {z}{k1} vpmovsxwq (%rax), %ymm19 {z}{k1} +vpmovusdb %xmm19, %xmm16 +vpmovusdb %xmm19, (%rax) +vpmovusdb %xmm19, %xmm16 {k1} +vpmovusdb %xmm19, (%rax) {k1} +vpmovusdb %xmm19, %xmm16 {k1}{z} + +vpmovusdb %ymm19, %xmm16 +vpmovusdb %ymm19, (%rax) +vpmovusdb %ymm19, %xmm16 {k1} +vpmovusdb %ymm19, (%rax) {k1} +vpmovusdb %ymm19, %xmm16 {k1}{z} + +vpmovusdw %xmm19, %xmm16 +vpmovusdw %xmm19, (%rax) +vpmovusdw %xmm19, %xmm16 {k1} +vpmovusdw %xmm19, (%rax) {k1} +vpmovusdw %xmm19, %xmm16 {k1}{z} + +vpmovusdw %ymm19, %xmm16 +vpmovusdw %ymm19, (%rax) +vpmovusdw %ymm19, %xmm16 {k1} +vpmovusdw %ymm19, (%rax) {k1} +vpmovusdw %ymm19, %xmm16 {k1}{z} + +vpmovusqb %xmm19, %xmm16 +vpmovusqb %xmm19, (%rax) +vpmovusqb %xmm19, %xmm16 {k1} +vpmovusqb %xmm19, (%rax) {k1} +vpmovusqb %xmm19, %xmm16 {k1}{z} + +vpmovusqb %ymm19, %xmm16 +vpmovusqb %ymm19, (%rax) +vpmovusqb %ymm19, %xmm16 {k1} +vpmovusqb %ymm19, (%rax) {k1} +vpmovusqb %ymm19, %xmm16 {k1}{z} + +vpmovusqd %xmm19, %xmm16 +vpmovusqd %xmm19, (%rax) +vpmovusqd %xmm19, %xmm16 {k1} +vpmovusqd %xmm19, (%rax) {k1} +vpmovusqd %xmm19, %xmm16 {k1}{z} + +vpmovusqd %ymm19, %xmm16 +vpmovusqd %ymm19, (%rax) +vpmovusqd %ymm19, %xmm16 {k1} +vpmovusqd %ymm19, (%rax) {k1} +vpmovusqd %ymm19, %xmm16 {k1}{z} + +vpmovusqw %xmm19, %xmm16 +vpmovusqw %xmm19, (%rax) +vpmovusqw %xmm19, %xmm16 {k1} +vpmovusqw %xmm19, (%rax) {k1} +vpmovusqw %xmm19, %xmm16 {k1}{z} + +vpmovusqw %ymm19, %xmm16 +vpmovusqw %ymm19, (%rax) +vpmovusqw %ymm19, %xmm16 {k1} +vpmovusqw %ymm19, (%rax) {k1} +vpmovusqw %ymm19, %xmm16 {k1}{z} + vpmovzxbd %xmm16, %xmm19 vpmovzxbd (%rax), %xmm19 vpmovzxbd %xmm16, %xmm19 {k1} @@ -2784,6 +2964,106 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1 5 0.33 * vpgatherdd (%rax,%xmm1,2), %xmm2 {%k1} # CHECK-NEXT: 1 5 0.33 * vpgatherqq (%rax,%xmm1,2), %xmm2 {%k1} # CHECK-NEXT: 1 5 0.33 * vpgatherqd (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovdb %xmm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovdb %xmm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovdb %ymm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovdb %ymm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovdw %xmm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovdw %xmm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovdw %ymm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovdw %ymm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovqb %xmm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovqb %xmm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovqb %ymm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovqb %ymm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovqd %xmm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovqd %xmm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovqd %ymm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovqd %ymm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovqw %xmm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovqw %xmm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovqw %ymm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovqw %ymm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovqw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovsdb %xmm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovsdb %xmm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovsdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovsdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovsdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovsdb %ymm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovsdb %ymm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovsdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovsdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovsdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovsdw %xmm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovsdw %xmm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovsdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovsdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovsdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovsdw %ymm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovsdw %ymm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovsdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovsdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovsdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovsqb %xmm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovsqb %xmm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovsqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovsqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovsqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovsqb %ymm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovsqb %ymm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovsqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovsqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovsqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovsqd %xmm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovsqd %xmm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovsqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovsqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovsqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovsqd %ymm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovsqd %ymm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovsqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovsqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovsqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovsqw %xmm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovsqw %xmm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovsqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovsqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovsqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovsqw %ymm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovsqw %ymm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovsqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovsqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovsqw %ymm19, %xmm16 {%k1} {z} # CHECK-NEXT: 1 2 0.50 vpmovsxbd %xmm16, %xmm19 # CHECK-NEXT: 1 8 0.50 * vpmovsxbd (%rax), %xmm19 # CHECK-NEXT: 1 2 0.50 vpmovsxbd %xmm16, %xmm19 {%k1} @@ -2844,6 +3124,56 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1 11 1.50 * vpmovsxwq (%rax), %ymm19 {%k1} # CHECK-NEXT: 1 2 0.50 vpmovsxwq %xmm16, %ymm19 {%k1} {z} # CHECK-NEXT: 1 11 1.50 * vpmovsxwq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovusdb %xmm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovusdb %xmm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovusdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovusdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovusdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovusdb %ymm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovusdb %ymm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovusdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovusdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovusdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovusdw %xmm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovusdw %xmm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovusdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovusdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovusdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovusdw %ymm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovusdw %ymm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovusdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovusdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovusdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovusqb %xmm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovusqb %xmm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovusqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovusqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovusqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovusqb %ymm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovusqb %ymm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovusqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovusqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovusqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovusqd %xmm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovusqd %xmm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovusqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovusqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovusqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovusqd %ymm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovusqd %ymm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovusqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovusqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovusqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 1 2 0.50 vpmovusqw %xmm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovusqw %xmm19, (%rax) +# CHECK-NEXT: 1 2 0.50 vpmovusqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovusqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: 1 2 0.50 vpmovusqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: 2 4 1.50 vpmovusqw %ymm19, %xmm16 +# CHECK-NEXT: 1 11 1.50 * vpmovusqw %ymm19, (%rax) +# CHECK-NEXT: 2 4 1.50 vpmovusqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: 1 11 1.50 * vpmovusqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: 2 4 1.50 vpmovusqw %ymm19, %xmm16 {%k1} {z} # CHECK-NEXT: 1 2 0.50 vpmovzxbd %xmm16, %xmm19 # CHECK-NEXT: 1 8 0.50 * vpmovzxbd (%rax), %xmm19 # CHECK-NEXT: 1 2 0.50 vpmovzxbd %xmm16, %xmm19 {%k1} @@ -3284,7 +3614,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] -# CHECK-NEXT: 10.67 10.67 10.67 - - - - - 208.00 948.00 501.50 261.50 479.50 479.50 32.00 335.67 335.67 335.67 314.33 314.33 314.33 32.00 32.00 +# CHECK-NEXT: 10.67 10.67 10.67 - - - - - 208.00 1086.00 639.50 261.50 509.50 509.50 32.00 355.67 355.67 355.67 334.33 334.33 334.33 32.00 32.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: @@ -4340,6 +4670,106 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpgatherdd (%rax,%xmm1,2), %xmm2 {%k1} # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpgatherqq (%rax,%xmm1,2), %xmm2 {%k1} # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpgatherqd (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovdb %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovdb %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovdb %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovdb %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovdw %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovdw %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovdw %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovdw %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovqb %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovqb %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovqb %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovqb %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovqd %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovqd %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovqd %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovqd %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovqw %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovqw %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovqw %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovqw %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovqw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsdb %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsdb %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsdb %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsdb %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsdw %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsdw %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsdw %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsdw %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsqb %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsqb %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsqb %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsqb %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsqd %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsqd %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsqd %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsqd %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsqw %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsqw %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsqw %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsqw %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsqw %ymm19, %xmm16 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsxbd %xmm16, %xmm19 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxbd (%rax), %xmm19 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsxbd %xmm16, %xmm19 {%k1} @@ -4400,6 +4830,56 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxwq (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsxwq %xmm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxwq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovusdb %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusdb %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovusdb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusdb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovusdb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovusdb %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusdb %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovusdb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusdb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovusdb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovusdw %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusdw %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovusdw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusdw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovusdw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovusdw %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusdw %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovusdw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusdw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovusdw %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovusqb %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusqb %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovusqb %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusqb %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovusqb %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovusqb %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusqb %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovusqb %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusqb %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovusqb %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovusqd %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusqd %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovusqd %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusqd %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovusqd %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovusqd %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusqd %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovusqd %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusqd %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovusqd %ymm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovusqw %xmm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusqw %xmm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovusqw %xmm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusqw %xmm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovusqw %xmm19, %xmm16 {%k1} {z} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovusqw %ymm19, %xmm16 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusqw %ymm19, (%rax) +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovusqw %ymm19, %xmm16 {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovusqw %ymm19, (%rax) {%k1} +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovusqw %ymm19, %xmm16 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxbd %xmm16, %xmm19 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbd (%rax), %xmm19 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxbd %xmm16, %xmm19 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-sse41.s index 5521c1eae6dfd..f31eb7c2e6d6f 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-sse41.s @@ -213,29 +213,29 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 1 8 0.50 * pminud (%rax), %xmm2 # CHECK-NEXT: 1 1 0.25 pminuw %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * pminuw (%rax), %xmm2 -# CHECK-NEXT: 1 4 2.00 pmovsxbd %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovsxbd %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * pmovsxbd (%rax), %xmm2 -# CHECK-NEXT: 1 4 2.00 pmovsxbq %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovsxbq %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * pmovsxbq (%rax), %xmm2 -# CHECK-NEXT: 1 4 2.00 pmovsxbw %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovsxbw %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * pmovsxbw (%rax), %xmm2 -# CHECK-NEXT: 1 4 2.00 pmovsxdq %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovsxdq %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * pmovsxdq (%rax), %xmm2 -# CHECK-NEXT: 1 4 2.00 pmovsxwd %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovsxwd %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * pmovsxwd (%rax), %xmm2 -# CHECK-NEXT: 1 4 2.00 pmovsxwq %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovsxwq %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * pmovsxwq (%rax), %xmm2 -# CHECK-NEXT: 1 4 2.00 pmovzxbd %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovzxbd %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * pmovzxbd (%rax), %xmm2 -# CHECK-NEXT: 1 4 2.00 pmovzxbq %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovzxbq %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * pmovzxbq (%rax), %xmm2 -# CHECK-NEXT: 1 4 2.00 pmovzxbw %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovzxbw %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * pmovzxbw (%rax), %xmm2 -# CHECK-NEXT: 1 4 2.00 pmovzxdq %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovzxdq %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * pmovzxdq (%rax), %xmm2 -# CHECK-NEXT: 1 4 2.00 pmovzxwd %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovzxwd %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * pmovzxwd (%rax), %xmm2 -# CHECK-NEXT: 1 4 2.00 pmovzxwq %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovzxwq %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * pmovzxwq (%rax), %xmm2 # CHECK-NEXT: 1 3 0.50 pmuldq %xmm0, %xmm2 # CHECK-NEXT: 1 10 0.50 * pmuldq (%rax), %xmm2 @@ -279,7 +279,7 @@ roundss $1, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] -# CHECK-NEXT: - - - - - - - - 31.00 61.50 46.50 16.00 35.50 35.50 7.00 16.33 16.33 16.33 14.67 14.67 14.67 2.50 2.50 +# CHECK-NEXT: - - - - - - - - 31.00 43.50 28.50 16.00 35.50 35.50 7.00 16.33 16.33 16.33 14.67 14.67 14.67 2.50 2.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: @@ -341,29 +341,29 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pminud (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pminuw %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pminuw (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - pmovsxbd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pmovsxbd %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmovsxbd (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - pmovsxbq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pmovsxbq %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmovsxbq (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - pmovsxbw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pmovsxbw %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmovsxbw (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - pmovsxdq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pmovsxdq %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmovsxdq (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - pmovsxwd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pmovsxwd %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmovsxwd (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - pmovsxwq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pmovsxwq %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmovsxwq (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - pmovzxbd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pmovzxbd %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmovzxbd (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - pmovzxbq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pmovzxbq %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmovzxbq (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - pmovzxbw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pmovzxbw %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmovzxbw (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - pmovzxdq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pmovzxdq %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmovzxdq (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - pmovzxwd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pmovzxwd %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmovzxwd (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - pmovzxwq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pmovzxwq %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmovzxwq (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - pmuldq %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmuldq (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-reduce/distinct-dimetadata-nullptr.ll b/llvm/test/tools/llvm-reduce/distinct-dimetadata-nullptr.ll new file mode 100644 index 0000000000000..5861adc65b83c --- /dev/null +++ b/llvm/test/tools/llvm-reduce/distinct-dimetadata-nullptr.ll @@ -0,0 +1,17 @@ +; Test checking that distinct metadata reduction pass handles null pointers properly. +; This test will lead to a crash if nullptrs inside distinct metadata are not handled correctly, in this case inside DICompileUnit + +; RUN: llvm-reduce --delta-passes=distinct-metadata --aggressive-named-md-reduction --test FileCheck --test-arg %s --test-arg --input-file %s -o %t +; CHECK: {{.*}}distinct !DICompileUnit{{.*}} + + +!llvm.module.flags = !{!0, !1, !6} +!llvm.dbg.cu = !{!4} + +!0 = !{i32 7, !"Dwarf Version", i32 4} +!1 = !{i32 2, !"Source Lang Literal", !2} +!2 = !{!3} +!3 = !{!4, i32 33} +!4 = distinct !DICompileUnit(language: DW_LANG_OpenCL, file: !5, producer: "foobar", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug) +!5 = !DIFile(filename: "main.cpp", directory: "foodir") +!6 = !{i32 2, !"Debug Info Version", i32 3} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceDistinctMetadata.cpp b/llvm/tools/llvm-reduce/deltas/ReduceDistinctMetadata.cpp index 02129263f6af4..0f46409977a33 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceDistinctMetadata.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceDistinctMetadata.cpp @@ -39,7 +39,7 @@ reduceNodes(MDNode *Root, // Mark the nodes for removal for (unsigned int I = 0; I < CurrentNode->getNumOperands(); ++I) { if (MDNode *Operand = - dyn_cast(CurrentNode->getOperand(I).get())) { + dyn_cast_or_null(CurrentNode->getOperand(I).get())) { // Check whether node has been visited if (VisitedNodes.insert(Operand)) NodesToTraverse.push(Operand); @@ -71,7 +71,7 @@ static void cleanUpTemporaries(NamedMDNode &NamedNode, MDTuple *TemporaryTuple, for (auto I = NamedNode.op_begin(); I != NamedNode.op_end(); ++I) { // If the node hasn't been traversed yet, add it to the queue of nodes to // traverse. - if (MDTuple *TupleI = dyn_cast((*I))) { + if (MDTuple *TupleI = dyn_cast_or_null((*I))) { if (VisitedNodes.insert(TupleI)) NodesToTraverse.push(TupleI); } @@ -108,7 +108,8 @@ static void cleanUpTemporaries(NamedMDNode &NamedNode, MDTuple *TemporaryTuple, // Push the remaining nodes into the queue for (unsigned int I = 0; I < CurrentTuple->getNumOperands(); ++I) { - MDTuple *Operand = dyn_cast(CurrentTuple->getOperand(I).get()); + MDTuple *Operand = + dyn_cast_or_null(CurrentTuple->getOperand(I).get()); if (Operand && VisitedNodes.insert(Operand)) // If the node hasn't been traversed yet, add it to the queue of nodes // to traverse. @@ -127,7 +128,7 @@ static void extractDistinctMetadataFromModule(Oracle &O, Program.named_metadata()) { // Iterate over the named nodes for (unsigned int I = 0; I < NamedNode.getNumOperands(); ++I) { // Iterate over first level unnamed nodes.. - if (MDTuple *Operand = dyn_cast(NamedNode.getOperand(I))) + if (MDTuple *Operand = dyn_cast_or_null(NamedNode.getOperand(I))) reduceNodes(Operand, NodesToDelete, TemporaryTuple, O, Program); } } diff --git a/llvm/unittests/ADT/CMakeLists.txt b/llvm/unittests/ADT/CMakeLists.txt index b0077d5b54a3e..c9bc58f45f08c 100644 --- a/llvm/unittests/ADT/CMakeLists.txt +++ b/llvm/unittests/ADT/CMakeLists.txt @@ -75,6 +75,7 @@ add_llvm_unittest(ADTTests SmallPtrSetTest.cpp SmallSetTest.cpp SmallStringTest.cpp + SmallVectorExtrasTest.cpp SmallVectorTest.cpp SparseBitVectorTest.cpp SparseMultiSetTest.cpp diff --git a/llvm/unittests/ADT/SmallVectorExtrasTest.cpp b/llvm/unittests/ADT/SmallVectorExtrasTest.cpp new file mode 100644 index 0000000000000..467eb13ac390b --- /dev/null +++ b/llvm/unittests/ADT/SmallVectorExtrasTest.cpp @@ -0,0 +1,37 @@ +//===- llvm/unittest/ADT/SmallVectorExtrasTest.cpp ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// SmallVectorExtras unit tests. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVectorExtras.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +#include +#include + +using testing::ElementsAre; + +namespace llvm { +namespace { + +TEST(SmallVectorExtrasTest, FilterToVector) { + std::vector Numbers = {0, 1, 2, 3, 4}; + auto Odd = filter_to_vector<2>(Numbers, [](int X) { return (X % 2) != 0; }); + static_assert(std::is_same_v>); + EXPECT_THAT(Odd, ElementsAre(1, 3)); + + auto Even = filter_to_vector(Numbers, [](int X) { return (X % 2) == 0; }); + static_assert(std::is_same_v>); + EXPECT_THAT(Even, ElementsAre(0, 2, 4)); +} + +} // end namespace +} // namespace llvm diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDieTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDieTest.cpp index 485ec720ffad6..f566bee170236 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFDieTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFDieTest.cpp @@ -9,6 +9,7 @@ #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DWARF/DWARFTypePrinter.h" #include "llvm/ObjectYAML/DWARFEmitter.h" #include "llvm/Testing/Support/Error.h" #include "gtest/gtest.h" @@ -704,4 +705,138 @@ TEST(DWARFDie, getNameFromTypeUnit) { ASSERT_STREQ(Die.getName(DINameKind::ShortName), "STRUCT"); } +void testAppendAndTerminateTemplateParameters(const DWARFDie &DIE, + const std::string &Expected) { + std::string TemplateName; + llvm::raw_string_ostream TemplateNameOS(TemplateName); + llvm::DWARFTypePrinter TemplateNamePrinter(TemplateNameOS); + TemplateNamePrinter.appendAndTerminateTemplateParameters(DIE); + EXPECT_THAT(TemplateName, Expected); +} + +void testAppendQualifiedName(const DWARFDie &DIE, const std::string &Expected) { + std::string QualifiedName; + llvm::raw_string_ostream TemplateNameOS(QualifiedName); + llvm::DWARFTypePrinter TemplateNamePrinter(TemplateNameOS); + TemplateNamePrinter.appendQualifiedName(DIE); + EXPECT_THAT(QualifiedName, Expected); +} + +TEST(DWARFDie, DWARFTypePrinterTest) { + // Make sure we can get template parameters and qualified names correctly with + // DWARFTypePrinter when using -gsimple-template-names. + + // 0x0000000b: DW_TAG_compile_unit + // 0x0000000c: DW_TAG_base_type + // DW_AT_name ("int") + // 0x00000011: DW_TAG_structure_type + // DW_AT_name ("t1") + // 0x00000015: DW_TAG_template_type_parameter + // DW_AT_type (0x0000001f "t3") + // 0x0000001a: DW_TAG_structure_type + // DW_AT_name ("t2") + // 0x0000001e: NULL + // 0x0000001f: DW_TAG_structure_type + // DW_AT_name ("t3") + // 0x00000023: DW_TAG_template_type_parameter + // DW_AT_type (0x0000000c "int") + // 0x00000028: NULL + // 0x00000029: NULL + const char *yamldata = R"( + debug_abbrev: + - ID: 0 + Table: + - Code: 0x1 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + - Code: 0x2 + Tag: DW_TAG_base_type + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_string + - Code: 0x3 + Tag: DW_TAG_structure_type + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_string + - Code: 0x4 + Tag: DW_TAG_template_type_parameter + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_type + Form: DW_FORM_ref4 + - Code: 0x5 + Tag: DW_TAG_structure_type + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_string + - Code: 0x6 + Tag: DW_TAG_structure_type + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_string + - Code: 0x7 + Tag: DW_TAG_template_type_parameter + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_type + Form: DW_FORM_ref4 + - Code: 0x8 + Tag: DW_TAG_typedef + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_type + Form: DW_FORM_ref4 + - Attribute: DW_AT_name + Form: DW_FORM_string + debug_info: + - Version: 4 + AddrSize: 8 + Entries: + - AbbrCode: 0x1 + - AbbrCode: 0x2 + Values: + - Value: 0xDEADBEEFDEADBEEF + CStr: int + - AbbrCode: 0x3 + Values: + - Value: 0xDEADBEEFDEADBEEF + CStr: t1 + - AbbrCode: 0x4 + Values: + - Value: 0x0000001f # update + - AbbrCode: 0x5 + Values: + - Value: 0xDEADBEEFDEADBEEF + CStr: t2 + - AbbrCode: 0x0 + - AbbrCode: 0x6 + Values: + - Value: 0xDEADBEEFDEADBEEF + CStr: t3 + - AbbrCode: 0x7 + Values: + - Value: 0x0000000c # update + - AbbrCode: 0x8 + Values: + - Value: 0x0000000c + - CStr: my_int + - AbbrCode: 0x0 + - AbbrCode: 0x0)"; + Expected>> Sections = + DWARFYAML::emitDebugSections(StringRef(yamldata), + /*IsLittleEndian=*/true, + /*Is64BitAddrSize=*/true); + ASSERT_THAT_EXPECTED(Sections, Succeeded()); + std::unique_ptr Ctx = + DWARFContext::create(*Sections, 4, /*isLittleEndian=*/true); + testAppendAndTerminateTemplateParameters(Ctx->getDIEForOffset(0x11), + " >"); + testAppendQualifiedName(Ctx->getDIEForOffset(0x1a), "t1 >::t2"); + testAppendQualifiedName(Ctx->getDIEForOffset(0x28), "t3::my_int"); +} } // end anonymous namespace diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp index c3f35e41b5fc7..7b9910e295df9 100644 --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemProfTest.cpp @@ -628,7 +628,7 @@ TEST(MemProf, RadixTreeBuilderEmpty) { FrameHistogram = llvm::memprof::computeFrameHistogram(MemProfCallStackData); llvm::memprof::CallStackRadixTreeBuilder Builder; - Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes, + Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes, FrameHistogram); ASSERT_THAT(Builder.getRadixArray(), testing::IsEmpty()); const auto Mappings = Builder.takeCallStackPos(); @@ -646,7 +646,7 @@ TEST(MemProf, RadixTreeBuilderOne) { FrameHistogram = llvm::memprof::computeFrameHistogram(MemProfCallStackData); llvm::memprof::CallStackRadixTreeBuilder Builder; - Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes, + Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes, FrameHistogram); EXPECT_THAT(Builder.getRadixArray(), testing::ElementsAreArray({ 3U, // Size of CS1, @@ -673,7 +673,7 @@ TEST(MemProf, RadixTreeBuilderTwo) { FrameHistogram = llvm::memprof::computeFrameHistogram(MemProfCallStackData); llvm::memprof::CallStackRadixTreeBuilder Builder; - Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes, + Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes, FrameHistogram); EXPECT_THAT(Builder.getRadixArray(), testing::ElementsAreArray({ @@ -711,7 +711,7 @@ TEST(MemProf, RadixTreeBuilderSuccessiveJumps) { FrameHistogram = llvm::memprof::computeFrameHistogram(MemProfCallStackData); llvm::memprof::CallStackRadixTreeBuilder Builder; - Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes, + Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes, FrameHistogram); EXPECT_THAT(Builder.getRadixArray(), testing::ElementsAreArray({ diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp index 3e0be7027eeff..c2aa571b547c6 100644 --- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp +++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp @@ -2491,7 +2491,6 @@ static void emitValidateOperandClass(AsmMatcherInfo &Info, raw_ostream &OS) { if (!CI.isUserClass()) continue; - OS << " // '" << CI.ClassName << "' class\n"; OS << " case " << CI.Name << ": {\n"; OS << " DiagnosticPredicate DP(Operand." << CI.PredicateMethod << "());\n"; diff --git a/llvm/utils/UpdateTestChecks/asm.py b/llvm/utils/UpdateTestChecks/asm.py index f05d8b89e73b9..7d4fb7d8e1504 100644 --- a/llvm/utils/UpdateTestChecks/asm.py +++ b/llvm/utils/UpdateTestChecks/asm.py @@ -222,6 +222,11 @@ class string: flags=(re.M | re.S), ) +ASM_FUNCTION_XTENSA_RE = re.compile( + r"^(?P[^:]+): +# @(?P=func)\n(?P.*?)\n\.Lfunc_end\d+:\n", + flags=(re.M | re.S), +) + ASM_FUNCTION_CSKY_RE = re.compile( r"^_?(?P[^:]+):[ \t]*#+[ \t]*@(?P=func)\n(?:\s*\.?Lfunc_begin[^:\n]*:\n)?[^:]*?" r"(?P^##?[ \t]+[^:]+:.*?)\s*" @@ -492,6 +497,17 @@ def scrub_asm_ve(asm, args): return asm +def scrub_asm_xtensa(asm, args): + # Scrub runs of whitespace out of the assembly, but leave the leading + # whitespace in place. + asm = common.SCRUB_WHITESPACE_RE.sub(r" ", asm) + # Expand the tabs used for indentation. + asm = string.expandtabs(asm, 2) + # Strip trailing whitespace. + asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r"", asm) + return asm + + def scrub_asm_csky(asm, args): # Scrub runs of whitespace out of the assembly, but leave the leading # whitespace in place. @@ -576,6 +592,7 @@ def get_run_handler(triple): "wasm32": (scrub_asm_wasm, ASM_FUNCTION_WASM_RE), "wasm64": (scrub_asm_wasm, ASM_FUNCTION_WASM_RE), "ve": (scrub_asm_ve, ASM_FUNCTION_VE_RE), + "xtensa": (scrub_asm_xtensa, ASM_FUNCTION_XTENSA_RE), "csky": (scrub_asm_csky, ASM_FUNCTION_CSKY_RE), "nvptx": (scrub_asm_nvptx, ASM_FUNCTION_NVPTX_RE), "loongarch32": (scrub_asm_loongarch, ASM_FUNCTION_LOONGARCH_RE), diff --git a/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn b/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn index 51dc24481a513..945d31afca10f 100644 --- a/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn +++ b/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn @@ -15,6 +15,7 @@ unittest("CoreTests") { "BinaryContext.cpp", "DynoStats.cpp", "MCPlusBuilder.cpp", + "MemoryMaps.cpp", ] defines = [] diff --git a/mlir/cmake/modules/MLIRDetectPythonEnv.cmake b/mlir/cmake/modules/MLIRDetectPythonEnv.cmake index c07c55b1e17ad..d3a98aaf6ffd1 100644 --- a/mlir/cmake/modules/MLIRDetectPythonEnv.cmake +++ b/mlir/cmake/modules/MLIRDetectPythonEnv.cmake @@ -25,7 +25,7 @@ macro(mlir_configure_python_dev_packages) message(STATUS "Found python libraries: ${Python3_LIBRARIES}") message(STATUS "Found numpy v${Python3_NumPy_VERSION}: ${Python3_NumPy_INCLUDE_DIRS}") mlir_detect_pybind11_install() - find_package(pybind11 2.9 CONFIG REQUIRED) + find_package(pybind11 2.10 CONFIG REQUIRED) message(STATUS "Found pybind11 v${pybind11_VERSION}: ${pybind11_INCLUDE_DIR}") message(STATUS "Python prefix = '${PYTHON_MODULE_PREFIX}', " "suffix = '${PYTHON_MODULE_SUFFIX}', " diff --git a/mlir/docs/PatternRewriter.md b/mlir/docs/PatternRewriter.md index c61ceaf81681e..2f1483db8190a 100644 --- a/mlir/docs/PatternRewriter.md +++ b/mlir/docs/PatternRewriter.md @@ -73,7 +73,7 @@ public: // otherwise. // ... } - void rewrite(Operation *op, PatternRewriter &rewriter) { + void rewrite(Operation *op, PatternRewriter &rewriter) const override { // The `rewrite` method performs mutations on the IR rooted at `op` using // the provided rewriter. All mutations must go through the provided // rewriter. @@ -81,7 +81,7 @@ public: /// In this section, the `match` and `rewrite` implementation is specified /// using a single hook. - LogicalResult matchAndRewrite(Operation *op, PatternRewriter &rewriter) { + LogicalResult matchAndRewrite(Operation *op, PatternRewriter &rewriter) const override { // The `matchAndRewrite` method performs both the matching and the mutation. // Note that the match must reach a successful point before IR mutation may // take place. diff --git a/mlir/include/mlir-c/Pass.h b/mlir/include/mlir-c/Pass.h index 435db55a86783..fa1c7df15e835 100644 --- a/mlir/include/mlir-c/Pass.h +++ b/mlir/include/mlir-c/Pass.h @@ -75,10 +75,13 @@ MLIR_CAPI_EXPORTED MlirLogicalResult mlirPassManagerRunOnOp(MlirPassManager passManager, MlirOperation op); /// Enable IR printing. +/// The treePrintingPath argument is an optional path to a directory +/// where the dumps will be produced. If it isn't provided then dumps +/// are produced to stderr. MLIR_CAPI_EXPORTED void mlirPassManagerEnableIRPrinting( MlirPassManager passManager, bool printBeforeAll, bool printAfterAll, bool printModuleScope, bool printAfterOnlyOnChange, - bool printAfterOnlyOnFailure); + bool printAfterOnlyOnFailure, MlirStringRef treePrintingPath); /// Enable lir-reproducer-before-all. MLIR_CAPI_EXPORTED void diff --git a/mlir/include/mlir/Conversion/CMakeLists.txt b/mlir/include/mlir/Conversion/CMakeLists.txt index d212bf3e395e7..9f76ab659215e 100644 --- a/mlir/include/mlir/Conversion/CMakeLists.txt +++ b/mlir/include/mlir/Conversion/CMakeLists.txt @@ -6,3 +6,5 @@ mlir_tablegen(Passes.capi.cpp.inc -gen-pass-capi-impl --prefix Conversion) add_public_tablegen_target(MLIRConversionPassIncGen) add_mlir_doc(Passes ConversionPasses ./ -gen-pass-doc) + +add_subdirectory(ConvertToLLVM) diff --git a/mlir/include/mlir/Conversion/ConvertToLLVM/CMakeLists.txt b/mlir/include/mlir/Conversion/ConvertToLLVM/CMakeLists.txt new file mode 100644 index 0000000000000..54d7a03fc22df --- /dev/null +++ b/mlir/include/mlir/Conversion/ConvertToLLVM/CMakeLists.txt @@ -0,0 +1,7 @@ +set(LLVM_TARGET_DEFINITIONS ToLLVMInterface.td) +mlir_tablegen(ToLLVMAttrInterface.h.inc -gen-attr-interface-decls) +mlir_tablegen(ToLLVMAttrInterface.cpp.inc -gen-attr-interface-defs) +mlir_tablegen(ToLLVMOpInterface.h.inc -gen-op-interface-decls) +mlir_tablegen(ToLLVMOpInterface.cpp.inc -gen-op-interface-defs) +add_public_tablegen_target(MLIRConvertToLLVMInterfaceIncGen) +add_dependencies(mlir-generic-headers MLIRConvertToLLVMInterfaceIncGen) diff --git a/mlir/include/mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h b/mlir/include/mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h index 00aeed9bf29dc..6fd043646acd3 100644 --- a/mlir/include/mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h +++ b/mlir/include/mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h @@ -11,6 +11,7 @@ #include "mlir/IR/DialectInterface.h" #include "mlir/IR/MLIRContext.h" +#include "mlir/IR/OpDefinition.h" namespace mlir { class ConversionTarget; @@ -18,6 +19,7 @@ class LLVMTypeConverter; class MLIRContext; class Operation; class RewritePatternSet; +class AnalysisManager; /// Base class for dialect interfaces providing translation to LLVM IR. /// Dialects that can be translated should provide an implementation of this @@ -50,6 +52,18 @@ void populateConversionTargetFromOperation(Operation *op, LLVMTypeConverter &typeConverter, RewritePatternSet &patterns); +/// Helper function for populating LLVM conversion patterns. If `op` implements +/// the `ConvertToLLVMOpInterface` interface, then the LLVM conversion pattern +/// attributes provided by the interface will be used to configure the +/// conversion target, type converter, and the pattern set. +void populateOpConvertToLLVMConversionPatterns(Operation *op, + ConversionTarget &target, + LLVMTypeConverter &typeConverter, + RewritePatternSet &patterns); } // namespace mlir +#include "mlir/Conversion/ConvertToLLVM/ToLLVMAttrInterface.h.inc" + +#include "mlir/Conversion/ConvertToLLVM/ToLLVMOpInterface.h.inc" + #endif // MLIR_CONVERSION_CONVERTTOLLVM_TOLLVMINTERFACE_H diff --git a/mlir/include/mlir/Conversion/ConvertToLLVM/ToLLVMInterface.td b/mlir/include/mlir/Conversion/ConvertToLLVM/ToLLVMInterface.td new file mode 100644 index 0000000000000..1331a9802c570 --- /dev/null +++ b/mlir/include/mlir/Conversion/ConvertToLLVM/ToLLVMInterface.td @@ -0,0 +1,76 @@ + +//===- ToLLVMInterface.td - Conversion to LLVM interfaces -----*- tablegen -*-===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines interfaces for managing transformations, including populating +// pattern rewrites. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CONVERSION_CONVERTTOLLVM_TOLLVMINTERFACE_TD +#define MLIR_CONVERSION_CONVERTTOLLVM_TOLLVMINTERFACE_TD + +include "mlir/IR/OpBase.td" + +//===----------------------------------------------------------------------===// +// Attribute interface +//===----------------------------------------------------------------------===// + +def ConvertToLLVMAttrInterface : + AttrInterface<"ConvertToLLVMAttrInterface"> { + let description = [{ + The `ConvertToLLVMAttrInterface` attribute interfaces allows using + attributes to configure the convert to LLVM infrastructure, this includes: + - The conversion target. + - The LLVM type converter. + - The pattern set. + + This interface permits fined grained configuration of the `convert-to-llvm` + process. For example, attributes with target information like + `#nvvm.target` or `#rodcl.target` can leverage this interface for populating + patterns specific to a particular target. + }]; + let cppNamespace = "::mlir"; + let methods = [ + InterfaceMethod< + /*desc=*/[{ + Populate the dialect conversion target, type converter and pattern set. + }], + /*retTy=*/"void", + /*methodName=*/"populateConvertToLLVMConversionPatterns", + /*args=*/(ins "::mlir::ConversionTarget&":$target, + "::mlir::LLVMTypeConverter&":$typeConverter, + "::mlir::RewritePatternSet&":$patternSet)> + ]; +} + +//===----------------------------------------------------------------------===// +// Op interface +//===----------------------------------------------------------------------===// + +def ConvertToLLVMOpInterface : OpInterface<"ConvertToLLVMOpInterface"> { + let description = [{ + Interface for collecting all convert to LLVM attributes stored in an + operation. See `ConvertToLLVMAttrInterface` for more information on these + attributes. + }]; + let cppNamespace = "::mlir"; + let methods = [ + InterfaceMethod< + /*desc=*/[{ + Populate the provided vector with a list of convert to LLVM attributes + to apply. + }], + /*retTy=*/"void", + /*methodName=*/"getConvertToLLVMConversionAttrs", + /*args=*/(ins + "::llvm::SmallVectorImpl<::mlir::ConvertToLLVMAttrInterface>&":$attrs) + > + ]; +} + +#endif // MLIR_CONVERSION_CONVERTTOLLVM_TOLLVMINTERFACE_TD diff --git a/mlir/include/mlir/Conversion/GPUCommon/GPUToLLVM.h b/mlir/include/mlir/Conversion/GPUCommon/GPUToLLVM.h new file mode 100644 index 0000000000000..ad8c39fe67661 --- /dev/null +++ b/mlir/include/mlir/Conversion/GPUCommon/GPUToLLVM.h @@ -0,0 +1,25 @@ +//===- GPUToLLVM.h - Convert GPU to LLVM dialect ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This files declares registration functions for converting GPU to LLVM. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CONVERSION_GPUCOMMON_GPUTOLLVM_H +#define MLIR_CONVERSION_GPUCOMMON_GPUTOLLVM_H + +namespace mlir { +class DialectRegistry; +namespace gpu { +/// Registers the `ConvertToLLVMOpInterface` interface on the `gpu::GPUModuleOP` +/// operation. +void registerConvertGpuToLLVMInterface(DialectRegistry ®istry); +} // namespace gpu +} // namespace mlir + +#endif // MLIR_CONVERSION_GPUCOMMON_GPUTOLLVM_H diff --git a/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVM.h b/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVM.h new file mode 100644 index 0000000000000..6311630a23c8f --- /dev/null +++ b/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVM.h @@ -0,0 +1,27 @@ +//===- GPUToNVVM.h - Convert GPU to NVVM dialect ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This files declares registration functions for converting GPU to NVVM. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CONVERSION_GPUTONVVM_GPUTONVVM_H +#define MLIR_CONVERSION_GPUTONVVM_GPUTONVVM_H + +namespace mlir { +class DialectRegistry; +namespace NVVM { +/// Registers the `ConvertToLLVMAttrInterface` interface on the +/// `NVVM::NVVMTargetAttr` attribute. This interface populates the conversion +/// target, LLVM type converter, and pattern set for converting GPU operations +/// to NVVM. +void registerConvertGpuToNVVMInterface(DialectRegistry ®istry); +} // namespace NVVM +} // namespace mlir + +#endif // MLIR_CONVERSION_GPUTONVVM_GPUTONVVM_H diff --git a/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h b/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h index 645e86a430962..fc7c967f1b62c 100644 --- a/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h +++ b/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h @@ -31,6 +31,10 @@ LLVM::LLVMStructType convertMMAToLLVMType(gpu::MMAMatrixType type); /// Configure target to convert from the GPU dialect to NVVM. void configureGpuToNVVMConversionLegality(ConversionTarget &target); +/// Configure the LLVM type convert to convert types and address spaces from the +/// GPU dialect to NVVM. +void configureGpuToNVVMTypeConverter(LLVMTypeConverter &converter); + /// Collect a set of patterns to convert from the GPU dialect to NVVM. void populateGpuToNVVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns); diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 587341a433860..f87b55791334f 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -22,12 +22,20 @@ def ConvertToLLVMPass : Pass<"convert-to-llvm"> { This is a generic pass to convert to LLVM, it uses the `ConvertToLLVMPatternInterface` dialect interface to delegate to dialects the injection of conversion patterns. + + If `dynamic` is set to `true`, the pass will look for + `ConvertToLLVMAttrInterface` attributes and use them to further configure + the conversion process. This option also uses the `DataLayoutAnalysis` + analysis to configure the type converter. Enabling this option incurs in + extra overhead. }]; let constructor = "mlir::createConvertToLLVMPass()"; let options = [ ListOption<"filterDialects", "filter-dialects", "std::string", "Test conversion patterns of only the specified dialects">, + Option<"useDynamic", "dynamic", "bool", "false", + "Use op conversion attributes to configure the conversion">, ]; } diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h index 2ea589a7c4c3b..8b380751c2f9d 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h @@ -73,145 +73,6 @@ DEFINE_TRIVIAL_LLVM_TYPE(LLVMMetadataType, "llvm.metadata"); #undef DEFINE_TRIVIAL_LLVM_TYPE -//===----------------------------------------------------------------------===// -// LLVMStructType. -//===----------------------------------------------------------------------===// - -/// LLVM dialect structure type representing a collection of different-typed -/// elements manipulated together. Structured can optionally be packed, meaning -/// that their elements immediately follow each other in memory without -/// accounting for potential alignment. -/// -/// Structure types can be identified (named) or literal. Literal structures -/// are uniquely represented by the list of types they contain and packedness. -/// Literal structure types are immutable after construction. -/// -/// Identified structures are uniquely represented by their name, a string. They -/// have a mutable component, consisting of the list of types they contain, -/// the packedness and the opacity bits. Identified structs can be created -/// without providing the lists of element types, making them suitable to -/// represent recursive, i.e. self-referring, structures. Identified structs -/// without body are considered opaque. For such structs, one can set the body. -/// Identified structs can be created as intentionally-opaque, implying that the -/// caller does not intend to ever set the body (e.g. forward-declarations of -/// structs from another module) and wants to disallow further modification of -/// the body. For intentionally-opaque structs or non-opaque structs with the -/// body, one is not allowed to set another body (however, one can set exactly -/// the same body). -/// -/// Note that the packedness of the struct takes place in uniquing of literal -/// structs, but does not in uniquing of identified structs. -class LLVMStructType - : public Type::TypeBase { -public: - /// Inherit base constructors. - using Base::Base; - - static constexpr StringLiteral name = "llvm.struct"; - - /// Checks if the given type can be contained in a structure type. - static bool isValidElementType(Type type); - - /// Gets or creates an identified struct with the given name in the provided - /// context. Note that unlike llvm::StructType::create, this function will - /// _NOT_ rename a struct in case a struct with the same name already exists - /// in the context. Instead, it will just return the existing struct, - /// similarly to the rest of MLIR type ::get methods. - static LLVMStructType getIdentified(MLIRContext *context, StringRef name); - static LLVMStructType - getIdentifiedChecked(function_ref emitError, - MLIRContext *context, StringRef name); - - /// Gets a new identified struct with the given body. The body _cannot_ be - /// changed later. If a struct with the given name already exists, renames - /// the struct by appending a `.` followed by a number to the name. Renaming - /// happens even if the existing struct has the same body. - static LLVMStructType getNewIdentified(MLIRContext *context, StringRef name, - ArrayRef elements, - bool isPacked = false); - - /// Gets or creates a literal struct with the given body in the provided - /// context. - static LLVMStructType getLiteral(MLIRContext *context, ArrayRef types, - bool isPacked = false); - static LLVMStructType - getLiteralChecked(function_ref emitError, - MLIRContext *context, ArrayRef types, - bool isPacked = false); - - /// Gets or creates an intentionally-opaque identified struct. Such a struct - /// cannot have its body set. To create an opaque struct with a mutable body, - /// use `getIdentified`. Note that unlike llvm::StructType::create, this - /// function will _NOT_ rename a struct in case a struct with the same name - /// already exists in the context. Instead, it will just return the existing - /// struct, similarly to the rest of MLIR type ::get methods. - static LLVMStructType getOpaque(StringRef name, MLIRContext *context); - static LLVMStructType - getOpaqueChecked(function_ref emitError, - MLIRContext *context, StringRef name); - - /// Set the body of an identified struct. Returns failure if the body could - /// not be set, e.g. if the struct already has a body or if it was marked as - /// intentionally opaque. This might happen in a multi-threaded context when a - /// different thread modified the struct after it was created. Most callers - /// are likely to assert this always succeeds, but it is possible to implement - /// a local renaming scheme based on the result of this call. - LogicalResult setBody(ArrayRef types, bool isPacked); - - /// Checks if a struct is packed. - bool isPacked() const; - - /// Checks if a struct is identified. - bool isIdentified() const; - - /// Checks if a struct is opaque. - bool isOpaque(); - - /// Checks if a struct is initialized. - bool isInitialized(); - - /// Returns the name of an identified struct. - StringRef getName(); - - /// Returns the list of element types contained in a non-opaque struct. - ArrayRef getBody() const; - - /// Verifies that the type about to be constructed is well-formed. - static LogicalResult - verifyInvariants(function_ref emitError, StringRef, - bool); - static LogicalResult - verifyInvariants(function_ref emitError, - ArrayRef types, bool); - using Base::verifyInvariants; - - /// Hooks for DataLayoutTypeInterface. Should not be called directly. Obtain a - /// DataLayout instance and query it instead. - llvm::TypeSize getTypeSizeInBits(const DataLayout &dataLayout, - DataLayoutEntryListRef params) const; - - uint64_t getABIAlignment(const DataLayout &dataLayout, - DataLayoutEntryListRef params) const; - - uint64_t getPreferredAlignment(const DataLayout &dataLayout, - DataLayoutEntryListRef params) const; - - bool areCompatible(DataLayoutEntryListRef oldLayout, - DataLayoutEntryListRef newLayout) const; - - LogicalResult verifyEntries(DataLayoutEntryListRef entries, - Location loc) const; - - /// Destructs the struct into its indexed field types. - std::optional> getSubelementIndexMap(); - - /// Returns which type is stored at a given integer index within the struct. - Type getTypeAtIndex(Attribute index); -}; - //===----------------------------------------------------------------------===// // Printing and parsing. //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.td index 09dd0919c318f..e88139fa5b28d 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.td @@ -117,6 +117,140 @@ def LLVMFunctionType : LLVMType<"LLVMFunction", "func"> { }]; } +//===----------------------------------------------------------------------===// +// LLVMStructType +//===----------------------------------------------------------------------===// + +def LLVMStructType : LLVMType<"LLVMStruct", "struct", [ + MutableType, + DeclareTypeInterfaceMethods, + DeclareTypeInterfaceMethods +]> { + let summary = "LLVM struct type"; + + let description = [{ + LLVM dialect structure type representing a collection of different-typed + elements manipulated together. Struct types can optionally be packed, meaning + that their elements immediately follow each other in memory without + accounting for potential alignment. + + Structure types can be identified (named) or literal. Literal structures + are uniquely represented by the list of types they contain and packedness. + Literal structure types are immutable after construction. + + Identified structures are uniquely represented by their name, a string. They + have a mutable component, consisting of the list of types they contain, + the packedness and the opacity bits. Identified structs can be created + without providing the lists of element types, making them suitable to + represent recursive, i.e. self-referring, structures. Identified structs + without body are considered opaque. For such structs, one can set the body. + Identified structs can be created as intentionally-opaque, implying that the + caller does not intend to ever set the body (e.g. forward-declarations of + structs from another module) and wants to disallow further modification of + the body. For intentionally-opaque structs or non-opaque structs with the + body, one is not allowed to set another body (however, one can set exactly + the same body). + + Note that the packedness of the struct takes place in uniquing of literal + structs, but does not in uniquing of identified structs. + }]; + + // Specify parameters for which TableGen can generate convenient getters for + // us. + // TODO: Other parameters such as 'packed' or 'opaque' could be added in the + // future iff they generate getters prefixed with 'is', instead of + // 'get'. Until then there are no advantages in doing so. + let parameters = (ins + StringRefParameter<"struct name", [{""}]>:$name, + OptionalArrayRefParameter<"mlir::Type">:$body + ); + + // A custom storage class defined in C++ is required to implement mutability. + let storageClass = "LLVMStructTypeStorage"; + let genStorageClass = 0; + + // We want users to use the more aptly named custom builders below. + let skipDefaultBuilders = 1; + + let extraClassDeclaration = [{ + /// Checks if the given type can be contained in a structure type. + static bool isValidElementType(Type type); + + /// Gets or creates an identified struct with the given name in the provided + /// context. Note that unlike llvm::StructType::create, this function will + /// _NOT_ rename a struct in case a struct with the same name already exists + /// in the context. Instead, it will just return the existing struct, + /// similarly to the rest of MLIR type ::get methods. + static LLVMStructType getIdentified(MLIRContext *context, StringRef name); + static LLVMStructType + getIdentifiedChecked(function_ref emitError, + MLIRContext *context, StringRef name); + + /// Gets a new identified struct with the given body. The body _cannot_ be + /// changed later. If a struct with the given name already exists, renames + /// the struct by appending a `.` followed by a number to the name. Renaming + /// happens even if the existing struct has the same body. + static LLVMStructType getNewIdentified(MLIRContext *context, StringRef name, + ArrayRef elements, + bool isPacked = false); + + /// Gets or creates a literal struct with the given body in the provided + /// context. + static LLVMStructType getLiteral(MLIRContext *context, ArrayRef types, + bool isPacked = false); + + static LLVMStructType + getLiteralChecked(function_ref emitError, + MLIRContext *context, ArrayRef types, + bool isPacked = false); + + /// Gets or creates an intentionally-opaque identified struct. Such a struct + /// cannot have its body set. + /// Note that unlike llvm::StructType::create, this function will _NOT_ + /// rename a struct in case a struct with the same name + /// already exists in the context. Instead, it will just return the existing + /// struct, similarly to the rest of MLIR type ::get methods. + static LLVMStructType getOpaque(StringRef name, MLIRContext *context); + + static LLVMStructType + getOpaqueChecked(function_ref emitError, + MLIRContext *context, StringRef name); + + /// Set the body of an identified struct. Returns failure if the body could + /// not be set, e.g. if the struct already has a body or if it was marked as + /// intentionally opaque. This might happen in a multi-threaded context when a + /// different thread modified the struct after it was created. Most callers + /// are likely to assert this always succeeds, but it is possible to implement + /// a local renaming scheme based on the result of this call. + LogicalResult setBody(ArrayRef types, bool isPacked); + + /// Checks if a struct is packed. + bool isPacked() const; + + /// Checks if a struct is identified. + bool isIdentified() const; + + /// Checks if a struct is opaque. + bool isOpaque(); + + /// Checks if a struct is initialized. + bool isInitialized(); + + /// Verifies that the type about to be constructed is well-formed. + static LogicalResult + verifyInvariants(function_ref emitError, StringRef, + bool); + static LogicalResult + verifyInvariants(function_ref emitError, + ArrayRef types, bool); + using Base::verifyInvariants; + }]; + + let hasCustomAssemblyFormat = 1; +} + //===----------------------------------------------------------------------===// // LLVMPointerType //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVMemoryOps.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVMemoryOps.td index de7be3f21f3b1..aad50175546a5 100644 --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVMemoryOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVMemoryOps.td @@ -46,18 +46,13 @@ def SPIRV_AccessChainOp : SPIRV_Op<"AccessChain", [Pure]> { - must be an OpConstant when indexing into a structure. - ``` - access-chain-op ::= ssa-id `=` `spirv.AccessChain` ssa-use - `[` ssa-use (',' ssa-use)* `]` - `:` pointer-type - ``` #### Example: ```mlir %0 = "spirv.Constant"() { value = 1: i32} : () -> i32 %1 = spirv.Variable : !spirv.ptr>, Function> - %2 = spirv.AccessChain %1[%0] : !spirv.ptr>, Function> + %2 = spirv.AccessChain %1[%0] : !spirv.ptr>, Function> -> !spirv.ptr, Function> %3 = spirv.Load "Function" %2 ["Volatile"] : !spirv.array<4xf32> ``` }]; @@ -149,17 +144,11 @@ def SPIRV_InBoundsPtrAccessChainOp : SPIRV_Op<"InBoundsPtrAccessChain", [Pure]> - ``` - access-chain-op ::= ssa-id `=` `spirv.InBoundsPtrAccessChain` ssa-use - `[` ssa-use (',' ssa-use)* `]` - `:` pointer-type - ``` - #### Example: ```mlir func @inbounds_ptr_access_chain(%arg0: !spirv.ptr, %arg1 : i64) -> () { - %0 = spirv.InBoundsPtrAccessChain %arg0[%arg1] : !spirv.ptr, i64 + %0 = spirv.InBoundsPtrAccessChain %arg0[%arg1] : !spirv.ptr, i64 -> !spirv.ptr ... } ``` @@ -183,6 +172,12 @@ def SPIRV_InBoundsPtrAccessChainOp : SPIRV_Op<"InBoundsPtrAccessChain", [Pure]> ); let builders = [OpBuilder<(ins "Value":$basePtr, "Value":$element, "ValueRange":$indices)>]; + + let hasCustomAssemblyFormat = 0; + + let assemblyFormat = [{ + $base_ptr `[` $element ($indices^)? `]` attr-dict `:` type($base_ptr) `,` type($element) (`,` type($indices)^)? `->` type($result) + }]; } // ----- @@ -275,17 +270,11 @@ def SPIRV_PtrAccessChainOp : SPIRV_Op<"PtrAccessChain", [Pure]> { - ``` - [access-chain-op ::= ssa-id `=` `spirv.PtrAccessChain` ssa-use - `[` ssa-use (',' ssa-use)* `]` - `:` pointer-type - ``` - #### Example: ```mlir func @ptr_access_chain(%arg0: !spirv.ptr, %arg1 : i64) -> () { - %0 = spirv.PtrAccessChain %arg0[%arg1] : !spirv.ptr, i64 + %0 = spirv.PtrAccessChain %arg0[%arg1] : !spirv.ptr, i64 -> !spirv.ptr ... } ``` @@ -311,6 +300,12 @@ def SPIRV_PtrAccessChainOp : SPIRV_Op<"PtrAccessChain", [Pure]> { ); let builders = [OpBuilder<(ins "Value":$basePtr, "Value":$element, "ValueRange":$indices)>]; + + let hasCustomAssemblyFormat = 0; + + let assemblyFormat = [{ + $base_ptr `[` $element ($indices^)? `]` attr-dict `:` type($base_ptr) `,` type($element) (`,` type($indices)^)? `->` type($result) + }]; } // ----- diff --git a/mlir/include/mlir/IR/AttrTypeBase.td b/mlir/include/mlir/IR/AttrTypeBase.td index cbe4f0d67574b..38d38cf098df3 100644 --- a/mlir/include/mlir/IR/AttrTypeBase.td +++ b/mlir/include/mlir/IR/AttrTypeBase.td @@ -56,6 +56,9 @@ class ParamNativeTypeTrait class GenInternalTypeTrait : GenInternalTrait; class PredTypeTrait : PredTrait; +// Trait required to be added to any type which is mutable. +def MutableType : NativeTypeTrait<"IsMutable">; + //===----------------------------------------------------------------------===// // Builders //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/IR/Builders.h b/mlir/include/mlir/IR/Builders.h index 6fb71ccefda15..daea2a23d6fbe 100644 --- a/mlir/include/mlir/IR/Builders.h +++ b/mlir/include/mlir/IR/Builders.h @@ -19,6 +19,7 @@ class AffineExpr; class IRMapping; class UnknownLoc; class FileLineColLoc; +class FileLineColRange; class Type; class PrimitiveType; class IntegerType; diff --git a/mlir/include/mlir/IR/BuiltinDialectBytecode.td b/mlir/include/mlir/IR/BuiltinDialectBytecode.td index f50b5dd7ad822..87da8fd3568fa 100644 --- a/mlir/include/mlir/IR/BuiltinDialectBytecode.td +++ b/mlir/include/mlir/IR/BuiltinDialectBytecode.td @@ -95,11 +95,26 @@ def CallSiteLoc : DialectAttribute<(attr LocationAttr:$caller )>; +let cType = "FileLineColRange" in { +def FileLineColRange : DialectAttribute<(attr + StringAttr:$filename, + WithBuilder<"$_args", + WithType<"SmallVector", + WithParser<"succeeded(readFileLineColRangeLocs($_reader, $_var))", + WithPrinter<"writeFileLineColRangeLocs($_writer, $_name)">>>>:$rawLocData +)> { + let cBuilder = "getFileLineColRange(context, filename, rawLocData)"; + let printerPredicate = "!::llvm::isa($_val)"; +} + def FileLineColLoc : DialectAttribute<(attr StringAttr:$filename, - VarInt:$line, - VarInt:$column -)>; + VarInt:$start_line, + VarInt:$start_column +)> { + let printerPredicate = "::llvm::isa($_val)"; +} +} let cType = "FusedLoc", cBuilder = "cast(get(context, $_args))" in { @@ -321,7 +336,8 @@ def BuiltinDialectAttributes : DialectAttributes<"Builtin"> { DenseIntOrFPElementsAttr, DenseStringElementsAttr, SparseElementsAttr, - DistinctAttr + DistinctAttr, + FileLineColRange, ]; } diff --git a/mlir/include/mlir/IR/BuiltinLocationAttributes.td b/mlir/include/mlir/IR/BuiltinLocationAttributes.td index bbe566ce97777..fe4e61100872f 100644 --- a/mlir/include/mlir/IR/BuiltinLocationAttributes.td +++ b/mlir/include/mlir/IR/BuiltinLocationAttributes.td @@ -60,46 +60,98 @@ def CallSiteLoc : Builtin_LocationAttr<"CallSiteLoc"> { } //===----------------------------------------------------------------------===// -// FileLineColLoc +// FileLineColRange //===----------------------------------------------------------------------===// -def FileLineColLoc : Builtin_LocationAttr<"FileLineColLoc"> { - let summary = "A file:line:column source location"; +def FileLineColRange : Builtin_LocationAttr<"FileLineColRange"> { + let summary = "A file:line:column source location range"; let description = [{ Syntax: ``` filelinecol-location ::= string-literal `:` integer-literal `:` integer-literal + (`to` (integer-literal ?) `:` integer-literal ?) ``` - An instance of this location represents a tuple of file, line number, and - column number. This is similar to the type of location that you get from - most source languages. + An instance of this location represents a tuple of file, start and end line + number, and start and end column number. It allows for the following + configurations: + + * A single file line location: `file:line`; + * A single file line col location: `file:line:column`; + * A single line range: `file:line:column to :column`; + * A single file range: `file:line:column to line:column`; Example: ```mlir - loc("mysource.cc":10:8) + loc("mysource.cc":10:8 to 12:18) ``` }]; - let parameters = (ins "StringAttr":$filename, "unsigned":$line, - "unsigned":$column); + + let parameters = (ins "StringAttr":$filename, + "unsigned":$start_line, "unsigned":$start_column, + "unsigned":$end_line, "unsigned":$end_column); let builders = [ + AttrBuilderWithInferredContext<(ins "StringAttr":$filename), [{ + return $_get(filename.getContext(), filename, ArrayRef{}); + }]>, + AttrBuilderWithInferredContext<(ins "StringAttr":$filename, + "unsigned":$line), [{ + return $_get(filename.getContext(), filename, + ArrayRef{line}); + }]>, AttrBuilderWithInferredContext<(ins "StringAttr":$filename, "unsigned":$line, "unsigned":$column), [{ - return $_get(filename.getContext(), filename, line, column); + return $_get(filename.getContext(), filename, + ArrayRef{line, column}); }]>, - AttrBuilder<(ins "StringRef":$filename, "unsigned":$line, - "unsigned":$column), [{ + AttrBuilder<(ins "::llvm::StringRef":$filename, + "unsigned":$start_line, + "unsigned":$start_column), [{ return $_get($_ctxt, - StringAttr::get($_ctxt, filename.empty() ? "-" : filename), - line, column); - }]> + StringAttr::get($_ctxt, filename.empty() ? "-" : filename), + ArrayRef{start_line, start_column}); + }]>, + AttrBuilderWithInferredContext<(ins "::mlir::StringAttr":$filename, + "unsigned":$line, + "unsigned":$start_column, + "unsigned":$end_column), [{ + return $_get(filename.getContext(), filename, + ArrayRef{line, start_column, end_column}); + }]>, + AttrBuilderWithInferredContext<(ins "::mlir::StringAttr":$filename, + "unsigned":$start_line, + "unsigned":$start_column, + "unsigned":$end_line, + "unsigned":$end_column), [{ + return $_get(filename.getContext(), filename, + ArrayRef{start_line, start_column, end_column, end_line}); + }]>, + AttrBuilder<(ins "::llvm::StringRef":$filename, + "unsigned":$start_line, + "unsigned":$start_column, + "unsigned":$end_line, + "unsigned":$end_column), [{ + return $_get($_ctxt, + StringAttr::get($_ctxt, filename.empty() ? "-" : filename), + ArrayRef{start_line, start_column, end_column, end_line}); + }]>, ]; + + let extraClassDeclaration = [{ + ::mlir::StringAttr getFilename() const; + unsigned getStartLine() const; + unsigned getStartColumn() const; + unsigned getEndColumn() const; + unsigned getEndLine() const; + }]; let skipDefaultBuilders = 1; - let attrName = "builtin.file_line_loc"; + let genAccessors = 0; + let genStorageClass = 0; + let attrName = "builtin.file_line_range"; } //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/IR/Location.h b/mlir/include/mlir/IR/Location.h index 5eb1bfaf4afcd..e206501f5ee6a 100644 --- a/mlir/include/mlir/IR/Location.h +++ b/mlir/include/mlir/IR/Location.h @@ -136,6 +136,11 @@ inline ::llvm::hash_code hash_value(Location arg) { // Tablegen Attribute Declarations //===----------------------------------------------------------------------===// +// Forward declaration for class created later. +namespace mlir::detail { +struct FileLineColRangeAttrStorage; +} // namespace mlir::detail + #define GET_ATTRDEF_CLASSES #include "mlir/IR/BuiltinLocationAttributes.h.inc" @@ -164,6 +169,32 @@ class FusedLocWith : public FusedLoc { } }; +//===----------------------------------------------------------------------===// +// FileLineColLoc +//===----------------------------------------------------------------------===// + +/// An instance of this location represents a tuple of file, line number, and +/// column number. This is similar to the type of location that you get from +/// most source languages. +/// +/// FileLineColLoc is a FileLineColRange with exactly one line and column. +class FileLineColLoc : public FileLineColRange { +public: + using FileLineColRange::FileLineColRange; + + static FileLineColLoc get(StringAttr filename, unsigned line, + unsigned column); + static FileLineColLoc get(MLIRContext *context, StringRef fileName, + unsigned line, unsigned column); + + StringAttr getFilename() const; + unsigned getLine() const; + unsigned getColumn() const; + + /// Methods for support type inquiry through isa, cast, and dyn_cast. + static bool classof(Attribute attr); +}; + //===----------------------------------------------------------------------===// // OpaqueLoc //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/InitAllExtensions.h b/mlir/include/mlir/InitAllExtensions.h index 1f2ef26b45070..14a6a2787b3a5 100644 --- a/mlir/include/mlir/InitAllExtensions.h +++ b/mlir/include/mlir/InitAllExtensions.h @@ -18,6 +18,8 @@ #include "mlir/Conversion/ComplexToLLVM/ComplexToLLVM.h" #include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h" #include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h" +#include "mlir/Conversion/GPUCommon/GPUToLLVM.h" +#include "mlir/Conversion/GPUToNVVM/GPUToNVVM.h" #include "mlir/Conversion/IndexToLLVM/IndexToLLVM.h" #include "mlir/Conversion/MathToLLVM/MathToLLVM.h" #include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h" @@ -72,6 +74,8 @@ inline void registerAllExtensions(DialectRegistry ®istry) { registerConvertOpenMPToLLVMInterface(registry); ub::registerConvertUBToLLVMInterface(registry); registerConvertAMXToLLVMInterface(registry); + gpu::registerConvertGpuToLLVMInterface(registry); + NVVM::registerConvertGpuToNVVMInterface(registry); // Register all transform dialect extensions. affine::registerTransformDialectExtension(registry); diff --git a/mlir/include/mlir/Interfaces/TilingInterface.td b/mlir/include/mlir/Interfaces/TilingInterface.td index 68d4bf7c17e18..b75fc5e806afb 100644 --- a/mlir/include/mlir/Interfaces/TilingInterface.td +++ b/mlir/include/mlir/Interfaces/TilingInterface.td @@ -66,7 +66,7 @@ def TilingInterface : OpInterface<"TilingInterface"> { /*desc=*/[{ Returns a list of iterator types that describe the number of loops. }], - /*retType=*/"::mlir::SmallVector", + /*retType=*/"::mlir::SmallVector<::mlir::utils::IteratorType>", /*methodName=*/"getLoopIteratorTypes", /*args=*/(ins), /*methodBody=*/"", @@ -353,8 +353,8 @@ def TilingInterface : OpInterface<"TilingInterface"> { /*methodName=*/"generateScalarImplementation", /*args=*/(ins "::mlir::OpBuilder &":$b, - "Location ":$loc, - "ValueRange ":$ivs), + "::mlir::Location ":$loc, + "::mlir::ValueRange ":$ivs), /*methodBody=*/"", /*defaultImplementation=*/[{ return failure(); diff --git a/mlir/lib/AsmParser/LocationParser.cpp b/mlir/lib/AsmParser/LocationParser.cpp index 1365da03c7c3d..fb0999bed201d 100644 --- a/mlir/lib/AsmParser/LocationParser.cpp +++ b/mlir/lib/AsmParser/LocationParser.cpp @@ -12,6 +12,7 @@ #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/Location.h" #include "mlir/Support/LLVM.h" +#include using namespace mlir; using namespace mlir::detail; @@ -97,37 +98,82 @@ ParseResult Parser::parseFusedLocation(LocationAttr &loc) { return success(); } -ParseResult Parser::parseNameOrFileLineColLocation(LocationAttr &loc) { +ParseResult Parser::parseNameOrFileLineColRange(LocationAttr &loc) { auto *ctx = getContext(); auto str = getToken().getStringValue(); consumeToken(Token::string); + std::optional startLine, startColumn, endLine, endColumn; + // If the next token is ':' this is a filelinecol location. if (consumeIf(Token::colon)) { // Parse the line number. if (getToken().isNot(Token::integer)) return emitWrongTokenError( - "expected integer line number in FileLineColLoc"); - auto line = getToken().getUnsignedIntegerValue(); - if (!line) + "expected integer line number in FileLineColRange"); + startLine = getToken().getUnsignedIntegerValue(); + if (!startLine) return emitWrongTokenError( - "expected integer line number in FileLineColLoc"); + "expected integer line number in FileLineColRange"); consumeToken(Token::integer); // Parse the ':'. - if (parseToken(Token::colon, "expected ':' in FileLineColLoc")) - return failure(); + if (getToken().isNot(Token::colon)) { + loc = FileLineColRange::get(StringAttr::get(ctx, str), *startLine); + return success(); + } + consumeToken(Token::colon); // Parse the column number. - if (getToken().isNot(Token::integer)) + if (getToken().isNot(Token::integer)) { + return emitWrongTokenError( + "expected integer column number in FileLineColRange"); + } + startColumn = getToken().getUnsignedIntegerValue(); + if (!startColumn.has_value()) + return emitError("expected integer column number in FileLineColRange"); + consumeToken(Token::integer); + + if (!isCurrentTokenAKeyword() || getTokenSpelling() != "to") { + loc = FileLineColLoc::get(ctx, str, *startLine, *startColumn); + return success(); + } + consumeToken(); + + // Parse the line number. + if (getToken().is(Token::integer)) { + endLine = getToken().getUnsignedIntegerValue(); + if (!endLine) { + return emitWrongTokenError( + "expected integer line number in FileLineColRange"); + } + consumeToken(Token::integer); + } + + // Parse the ':'. + if (getToken().isNot(Token::colon)) { + return emitWrongTokenError( + "expected either integer or `:` post `to` in FileLineColRange"); + } + consumeToken(Token::colon); + + // Parse the column number. + if (getToken().isNot(Token::integer)) { return emitWrongTokenError( - "expected integer column number in FileLineColLoc"); - auto column = getToken().getUnsignedIntegerValue(); - if (!column.has_value()) - return emitError("expected integer column number in FileLineColLoc"); + "expected integer column number in FileLineColRange"); + } + endColumn = getToken().getUnsignedIntegerValue(); + if (!endColumn.has_value()) + return emitError("expected integer column number in FileLineColRange"); consumeToken(Token::integer); - loc = FileLineColLoc::get(ctx, str, *line, *column); + if (endLine.has_value()) { + loc = FileLineColRange::get(StringAttr::get(ctx, str), *startLine, + *startColumn, *endLine, *endColumn); + } else { + loc = FileLineColRange::get(StringAttr::get(ctx, str), *startLine, + *startColumn, *endColumn); + } return success(); } @@ -166,7 +212,7 @@ ParseResult Parser::parseLocationInstance(LocationAttr &loc) { // Handle either name or filelinecol locations. if (getToken().is(Token::string)) - return parseNameOrFileLineColLocation(loc); + return parseNameOrFileLineColRange(loc); // Bare tokens required for other cases. if (!getToken().is(Token::bare_identifier)) diff --git a/mlir/lib/AsmParser/Parser.h b/mlir/lib/AsmParser/Parser.h index 4979cfc6e69e4..37670bd789fec 100644 --- a/mlir/lib/AsmParser/Parser.h +++ b/mlir/lib/AsmParser/Parser.h @@ -310,7 +310,7 @@ class Parser { ParseResult parseFusedLocation(LocationAttr &loc); /// Parse a name or FileLineCol location instance. - ParseResult parseNameOrFileLineColLocation(LocationAttr &loc); + ParseResult parseNameOrFileLineColRange(LocationAttr &loc); //===--------------------------------------------------------------------===// // Affine Parsing diff --git a/mlir/lib/Bindings/Python/IRAttributes.cpp b/mlir/lib/Bindings/Python/IRAttributes.cpp index ee50bf6e45d54..f05f9f02e50fa 100644 --- a/mlir/lib/Bindings/Python/IRAttributes.cpp +++ b/mlir/lib/Bindings/Python/IRAttributes.cpp @@ -1102,11 +1102,11 @@ class PyDenseElementsAttribute unpackedBooleans = unpackedBooleans[py::slice(0, numBooleans, 1)]; unpackedBooleans = equalFunc(unpackedBooleans, 1); - std::vector shape; MlirType shapedType = mlirAttributeGetType(*this); intptr_t rank = mlirShapedTypeGetRank(shapedType); + std::vector shape(rank); for (intptr_t i = 0; i < rank; ++i) { - shape.push_back(mlirShapedTypeGetDimSize(shapedType, i)); + shape[i] = mlirShapedTypeGetDimSize(shapedType, i); } unpackedBooleans = reshapeFunc(unpackedBooleans, shape); diff --git a/mlir/lib/Bindings/Python/Pass.cpp b/mlir/lib/Bindings/Python/Pass.cpp index 854ecf69d78c2..b2cdcb9d4bb24 100644 --- a/mlir/lib/Bindings/Python/Pass.cpp +++ b/mlir/lib/Bindings/Python/Pass.cpp @@ -76,14 +76,21 @@ void mlir::python::populatePassManagerSubmodule(py::module &m) { "enable_ir_printing", [](PyPassManager &passManager, bool printBeforeAll, bool printAfterAll, bool printModuleScope, bool printAfterChange, - bool printAfterFailure) { + bool printAfterFailure, + std::optional optionalTreePrintingPath) { + std::string treePrintingPath = ""; + if (optionalTreePrintingPath.has_value()) + treePrintingPath = optionalTreePrintingPath.value(); mlirPassManagerEnableIRPrinting( passManager.get(), printBeforeAll, printAfterAll, - printModuleScope, printAfterChange, printAfterFailure); + printModuleScope, printAfterChange, printAfterFailure, + mlirStringRefCreate(treePrintingPath.data(), + treePrintingPath.size())); }, "print_before_all"_a = false, "print_after_all"_a = true, "print_module_scope"_a = false, "print_after_change"_a = false, "print_after_failure"_a = false, + "tree_printing_dir_path"_a = py::none(), "Enable IR printing, default as mlir-print-ir-after-all.") .def( "enable_reproducer_before_all", diff --git a/mlir/lib/CAPI/IR/Pass.cpp b/mlir/lib/CAPI/IR/Pass.cpp index 37c9a1dfa6b66..b8106d3dcae1c 100644 --- a/mlir/lib/CAPI/IR/Pass.cpp +++ b/mlir/lib/CAPI/IR/Pass.cpp @@ -48,17 +48,25 @@ void mlirPassManagerEnableIRPrinting(MlirPassManager passManager, bool printBeforeAll, bool printAfterAll, bool printModuleScope, bool printAfterOnlyOnChange, - bool printAfterOnlyOnFailure) { + bool printAfterOnlyOnFailure, + MlirStringRef treePrintingPath) { auto shouldPrintBeforePass = [printBeforeAll](Pass *, Operation *) { return printBeforeAll; }; auto shouldPrintAfterPass = [printAfterAll](Pass *, Operation *) { return printAfterAll; }; - return unwrap(passManager) - ->enableIRPrinting(shouldPrintBeforePass, shouldPrintAfterPass, - printModuleScope, printAfterOnlyOnChange, - printAfterOnlyOnFailure); + if (unwrap(treePrintingPath).empty()) + return unwrap(passManager) + ->enableIRPrinting(shouldPrintBeforePass, shouldPrintAfterPass, + printModuleScope, printAfterOnlyOnChange, + printAfterOnlyOnFailure); + + unwrap(passManager) + ->enableIRPrintingToFileTree(shouldPrintBeforePass, shouldPrintAfterPass, + printModuleScope, printAfterOnlyOnChange, + printAfterOnlyOnFailure, + unwrap(treePrintingPath)); } void mlirPassManagerEnableReproducerBeforeAll(MlirPassManager passManager, diff --git a/mlir/lib/Conversion/ConvertToLLVM/CMakeLists.txt b/mlir/lib/Conversion/ConvertToLLVM/CMakeLists.txt index de3d850d520c0..c71711ba2ebed 100644 --- a/mlir/lib/Conversion/ConvertToLLVM/CMakeLists.txt +++ b/mlir/lib/Conversion/ConvertToLLVM/CMakeLists.txt @@ -7,6 +7,7 @@ add_mlir_conversion_library(MLIRConvertToLLVMInterface ToLLVMInterface.cpp DEPENDS + MLIRConvertToLLVMInterfaceIncGen LINK_LIBS PUBLIC MLIRIR @@ -21,6 +22,7 @@ add_mlir_conversion_library(MLIRConvertToLLVMPass LINK_LIBS PUBLIC MLIRIR + MLIRConvertToLLVMInterface MLIRLLVMCommonConversion MLIRLLVMDialect MLIRPass diff --git a/mlir/lib/Conversion/ConvertToLLVM/ConvertToLLVMPass.cpp b/mlir/lib/Conversion/ConvertToLLVM/ConvertToLLVMPass.cpp index b2407a258c271..673ba814d338f 100644 --- a/mlir/lib/Conversion/ConvertToLLVM/ConvertToLLVMPass.cpp +++ b/mlir/lib/Conversion/ConvertToLLVM/ConvertToLLVMPass.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "mlir/Analysis/DataLayoutAnalysis.h" #include "mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h" #include "mlir/Conversion/ConvertToLLVM/ToLLVMPass.h" #include "mlir/Conversion/LLVMCommon/ConversionTarget.h" @@ -27,6 +28,41 @@ namespace mlir { using namespace mlir; namespace { +/// Base class for creating the internal implementation of `convert-to-llvm` +/// passes. +class ConvertToLLVMPassInterface { +public: + ConvertToLLVMPassInterface(MLIRContext *context, + ArrayRef filterDialects); + virtual ~ConvertToLLVMPassInterface() = default; + + /// Get the dependent dialects used by `convert-to-llvm`. + static void getDependentDialects(DialectRegistry ®istry); + + /// Initialize the internal state of the `convert-to-llvm` pass + /// implementation. This method is invoked by `ConvertToLLVMPass::initialize`. + /// This method returns whether the initialization process failed. + virtual LogicalResult initialize() = 0; + + /// Transform `op` to LLVM with the conversions available in the pass. The + /// analysis manager can be used to query analyzes like `DataLayoutAnalysis` + /// to further configure the conversion process. This method is invoked by + /// `ConvertToLLVMPass::runOnOperation`. This method returns whether the + /// transformation process failed. + virtual LogicalResult transform(Operation *op, + AnalysisManager manager) const = 0; + +protected: + /// Visit the `ConvertToLLVMPatternInterface` dialect interfaces and call + /// `visitor` with each of the interfaces. If `filterDialects` is non-empty, + /// then `visitor` is invoked only with the dialects in the `filterDialects` + /// list. + LogicalResult visitInterfaces( + llvm::function_ref visitor); + MLIRContext *context; + /// List of dialects names to use as filters. + ArrayRef filterDialects; +}; /// This DialectExtension can be attached to the context, which will invoke the /// `apply()` method for every loaded dialect. If a dialect implements the @@ -58,74 +94,188 @@ class LoadDependentDialectExtension : public DialectExtensionBase { } }; +//===----------------------------------------------------------------------===// +// StaticConvertToLLVM +//===----------------------------------------------------------------------===// + +/// Static implementation of the `convert-to-llvm` pass. This version only looks +/// at dialect interfaces to configure the conversion process. +struct StaticConvertToLLVM : public ConvertToLLVMPassInterface { + /// Pattern set with conversions to LLVM. + std::shared_ptr patterns; + /// The conversion target. + std::shared_ptr target; + /// The LLVM type converter. + std::shared_ptr typeConverter; + using ConvertToLLVMPassInterface::ConvertToLLVMPassInterface; + + /// Configure the conversion to LLVM at pass initialization. + LogicalResult initialize() final { + auto target = std::make_shared(*context); + auto typeConverter = std::make_shared(context); + RewritePatternSet tempPatterns(context); + target->addLegalDialect(); + // Populate the patterns with the dialect interface. + if (failed(visitInterfaces([&](ConvertToLLVMPatternInterface *iface) { + iface->populateConvertToLLVMConversionPatterns( + *target, *typeConverter, tempPatterns); + }))) + return failure(); + this->patterns = + std::make_unique(std::move(tempPatterns)); + this->target = target; + this->typeConverter = typeConverter; + return success(); + } + + /// Apply the conversion driver. + LogicalResult transform(Operation *op, AnalysisManager manager) const final { + if (failed(applyPartialConversion(op, *target, *patterns))) + return failure(); + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// DynamicConvertToLLVM +//===----------------------------------------------------------------------===// + +/// Dynamic implementation of the `convert-to-llvm` pass. This version inspects +/// the IR to configure the conversion to LLVM. +struct DynamicConvertToLLVM : public ConvertToLLVMPassInterface { + /// A list of all the `ConvertToLLVMPatternInterface` dialect interfaces used + /// to partially configure the conversion process. + std::shared_ptr> + interfaces; + using ConvertToLLVMPassInterface::ConvertToLLVMPassInterface; + + /// Collect the dialect interfaces used to configure the conversion process. + LogicalResult initialize() final { + auto interfaces = + std::make_shared>(); + // Collect the interfaces. + if (failed(visitInterfaces([&](ConvertToLLVMPatternInterface *iface) { + interfaces->push_back(iface); + }))) + return failure(); + this->interfaces = interfaces; + return success(); + } + + /// Configure the conversion process and apply the conversion driver. + LogicalResult transform(Operation *op, AnalysisManager manager) const final { + RewritePatternSet patterns(context); + ConversionTarget target(*context); + target.addLegalDialect(); + // Get the data layout analysis. + const auto &dlAnalysis = manager.getAnalysis(); + LLVMTypeConverter typeConverter(context, &dlAnalysis); + + // Configure the conversion with dialect level interfaces. + for (ConvertToLLVMPatternInterface *iface : *interfaces) + iface->populateConvertToLLVMConversionPatterns(target, typeConverter, + patterns); + + // Configure the conversion attribute interfaces. + populateOpConvertToLLVMConversionPatterns(op, target, typeConverter, + patterns); + + // Apply the conversion. + if (failed(applyPartialConversion(op, target, std::move(patterns)))) + return failure(); + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// ConvertToLLVMPass +//===----------------------------------------------------------------------===// + /// This is a generic pass to convert to LLVM, it uses the /// `ConvertToLLVMPatternInterface` dialect interface to delegate to dialects /// the injection of conversion patterns. class ConvertToLLVMPass : public impl::ConvertToLLVMPassBase { - std::shared_ptr patterns; - std::shared_ptr target; - std::shared_ptr typeConverter; + std::shared_ptr impl; public: using impl::ConvertToLLVMPassBase::ConvertToLLVMPassBase; void getDependentDialects(DialectRegistry ®istry) const final { - registry.insert(); - registry.addExtensions(); + ConvertToLLVMPassInterface::getDependentDialects(registry); } LogicalResult initialize(MLIRContext *context) final { - RewritePatternSet tempPatterns(context); - auto target = std::make_shared(*context); - target->addLegalDialect(); - auto typeConverter = std::make_shared(context); - - if (!filterDialects.empty()) { - // Test mode: Populate only patterns from the specified dialects. Produce - // an error if the dialect is not loaded or does not implement the - // interface. - for (std::string &dialectName : filterDialects) { - Dialect *dialect = context->getLoadedDialect(dialectName); - if (!dialect) - return emitError(UnknownLoc::get(context)) - << "dialect not loaded: " << dialectName << "\n"; - auto *iface = dyn_cast(dialect); - if (!iface) - return emitError(UnknownLoc::get(context)) - << "dialect does not implement ConvertToLLVMPatternInterface: " - << dialectName << "\n"; - iface->populateConvertToLLVMConversionPatterns(*target, *typeConverter, - tempPatterns); - } - } else { - // Normal mode: Populate all patterns from all dialects that implement the - // interface. - for (Dialect *dialect : context->getLoadedDialects()) { - // First time we encounter this dialect: if it implements the interface, - // let's populate patterns ! - auto *iface = dyn_cast(dialect); - if (!iface) - continue; - iface->populateConvertToLLVMConversionPatterns(*target, *typeConverter, - tempPatterns); - } - } - - this->patterns = - std::make_unique(std::move(tempPatterns)); - this->target = target; - this->typeConverter = typeConverter; + std::shared_ptr impl; + // Choose the pass implementation. + if (useDynamic) + impl = std::make_shared(context, filterDialects); + else + impl = std::make_shared(context, filterDialects); + if (failed(impl->initialize())) + return failure(); + this->impl = impl; return success(); } void runOnOperation() final { - if (failed(applyPartialConversion(getOperation(), *target, *patterns))) - signalPassFailure(); + if (failed(impl->transform(getOperation(), getAnalysisManager()))) + return signalPassFailure(); } }; } // namespace +//===----------------------------------------------------------------------===// +// ConvertToLLVMPassInterface +//===----------------------------------------------------------------------===// + +ConvertToLLVMPassInterface::ConvertToLLVMPassInterface( + MLIRContext *context, ArrayRef filterDialects) + : context(context), filterDialects(filterDialects) {} + +void ConvertToLLVMPassInterface::getDependentDialects( + DialectRegistry ®istry) { + registry.insert(); + registry.addExtensions(); +} + +LogicalResult ConvertToLLVMPassInterface::visitInterfaces( + llvm::function_ref visitor) { + if (!filterDialects.empty()) { + // Test mode: Populate only patterns from the specified dialects. Produce + // an error if the dialect is not loaded or does not implement the + // interface. + for (StringRef dialectName : filterDialects) { + Dialect *dialect = context->getLoadedDialect(dialectName); + if (!dialect) + return emitError(UnknownLoc::get(context)) + << "dialect not loaded: " << dialectName << "\n"; + auto *iface = dyn_cast(dialect); + if (!iface) + return emitError(UnknownLoc::get(context)) + << "dialect does not implement ConvertToLLVMPatternInterface: " + << dialectName << "\n"; + visitor(iface); + } + } else { + // Normal mode: Populate all patterns from all dialects that implement the + // interface. + for (Dialect *dialect : context->getLoadedDialects()) { + // First time we encounter this dialect: if it implements the interface, + // let's populate patterns ! + auto *iface = dyn_cast(dialect); + if (!iface) + continue; + visitor(iface); + } + } + return success(); +} + +//===----------------------------------------------------------------------===// +// API +//===----------------------------------------------------------------------===// + void mlir::registerConvertToLLVMDependentDialectLoading( DialectRegistry ®istry) { registry.addExtensions(); diff --git a/mlir/lib/Conversion/ConvertToLLVM/ToLLVMInterface.cpp b/mlir/lib/Conversion/ConvertToLLVM/ToLLVMInterface.cpp index 3a4e83b2a8838..252245dfbf541 100644 --- a/mlir/lib/Conversion/ConvertToLLVM/ToLLVMInterface.cpp +++ b/mlir/lib/Conversion/ConvertToLLVM/ToLLVMInterface.cpp @@ -30,3 +30,22 @@ void mlir::populateConversionTargetFromOperation( patterns); }); } + +void mlir::populateOpConvertToLLVMConversionPatterns( + Operation *op, ConversionTarget &target, LLVMTypeConverter &typeConverter, + RewritePatternSet &patterns) { + auto iface = dyn_cast(op); + if (!iface) + iface = op->getParentOfType(); + if (!iface) + return; + SmallVector attrs; + iface.getConvertToLLVMConversionAttrs(attrs); + for (ConvertToLLVMAttrInterface attr : attrs) + attr.populateConvertToLLVMConversionPatterns(target, typeConverter, + patterns); +} + +#include "mlir/Conversion/ConvertToLLVM/ToLLVMAttrInterface.cpp.inc" + +#include "mlir/Conversion/ConvertToLLVM/ToLLVMOpInterface.cpp.inc" diff --git a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp index 92b28ff9c5873..1497d662dcdbd 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp @@ -22,6 +22,7 @@ #include "mlir/Conversion/ConvertToLLVM/ToLLVMPass.h" #include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h" #include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h" +#include "mlir/Conversion/GPUCommon/GPUToLLVM.h" #include "mlir/Conversion/LLVMCommon/ConversionTarget.h" #include "mlir/Conversion/LLVMCommon/Pattern.h" #include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h" @@ -1762,3 +1763,34 @@ void mlir::populateGpuToLLVMConversionPatterns(LLVMTypeConverter &converter, ConvertSetCsrPointersOpToGpuRuntimeCallPattern>(converter); patterns.add(converter, kernelBarePtrCallConv); } + +//===----------------------------------------------------------------------===// +// GPUModuleOp convert to LLVM op interface +//===----------------------------------------------------------------------===// + +namespace { +struct GPUModuleOpConvertToLLVMInterface + : public ConvertToLLVMOpInterface::ExternalModel< + GPUModuleOpConvertToLLVMInterface, gpu::GPUModuleOp> { + /// Get the conversion patterns from the target attribute. + void getConvertToLLVMConversionAttrs( + Operation *op, SmallVectorImpl &attrs) const; +}; +} // namespace + +void GPUModuleOpConvertToLLVMInterface::getConvertToLLVMConversionAttrs( + Operation *op, SmallVectorImpl &attrs) const { + auto module = cast(op); + ArrayAttr targetsAttr = module.getTargetsAttr(); + // Fail if there are no target attributes or there is more than one target. + if (!targetsAttr || targetsAttr.size() != 1) + return; + if (auto patternAttr = dyn_cast(targetsAttr[0])) + attrs.push_back(patternAttr); +} + +void mlir::gpu::registerConvertGpuToLLVMInterface(DialectRegistry ®istry) { + registry.addExtension(+[](MLIRContext *ctx, gpu::GPUDialect *dialect) { + gpu::GPUModuleOp::attachInterface(*ctx); + }); +} diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index 04e85c2b337de..b343cf71e3a2e 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -15,8 +15,10 @@ #include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h" #include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h" +#include "mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h" #include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h" #include "mlir/Conversion/GPUCommon/GPUCommonPass.h" +#include "mlir/Conversion/GPUToNVVM/GPUToNVVM.h" #include "mlir/Conversion/LLVMCommon/ConversionTarget.h" #include "mlir/Conversion/LLVMCommon/LoweringOptions.h" #include "mlir/Conversion/LLVMCommon/TypeConverter.h" @@ -274,29 +276,7 @@ struct LowerGpuOpsToNVVMOpsPass } LLVMTypeConverter converter(m.getContext(), options); - // NVVM uses alloca in the default address space to represent private - // memory allocations, so drop private annotations. NVVM uses address - // space 3 for shared memory. NVVM uses the default address space to - // represent global memory. - populateGpuMemorySpaceAttributeConversions( - converter, [](gpu::AddressSpace space) -> unsigned { - switch (space) { - case gpu::AddressSpace::Global: - return static_cast( - NVVM::NVVMMemorySpace::kGlobalMemorySpace); - case gpu::AddressSpace::Workgroup: - return static_cast( - NVVM::NVVMMemorySpace::kSharedMemorySpace); - case gpu::AddressSpace::Private: - return 0; - } - llvm_unreachable("unknown address space enum value"); - return 0; - }); - // Lowering for MMAMatrixType. - converter.addConversion([&](gpu::MMAMatrixType type) -> Type { - return convertMMAToLLVMType(type); - }); + configureGpuToNVVMTypeConverter(converter); RewritePatternSet llvmPatterns(m.getContext()); arith::populateArithToLLVMConversionPatterns(converter, llvmPatterns); @@ -332,6 +312,32 @@ void mlir::configureGpuToNVVMConversionLegality(ConversionTarget &target) { target.addLegalOp(); } +void mlir::configureGpuToNVVMTypeConverter(LLVMTypeConverter &converter) { + // NVVM uses alloca in the default address space to represent private + // memory allocations, so drop private annotations. NVVM uses address + // space 3 for shared memory. NVVM uses the default address space to + // represent global memory. + populateGpuMemorySpaceAttributeConversions( + converter, [](gpu::AddressSpace space) -> unsigned { + switch (space) { + case gpu::AddressSpace::Global: + return static_cast( + NVVM::NVVMMemorySpace::kGlobalMemorySpace); + case gpu::AddressSpace::Workgroup: + return static_cast( + NVVM::NVVMMemorySpace::kSharedMemorySpace); + case gpu::AddressSpace::Private: + return 0; + } + llvm_unreachable("unknown address space enum value"); + return 0; + }); + // Lowering for MMAMatrixType. + converter.addConversion([&](gpu::MMAMatrixType type) -> Type { + return convertMMAToLLVMType(type); + }); +} + template static void populateOpPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, StringRef f32Func, @@ -467,3 +473,34 @@ void mlir::populateGpuToNVVMConversionPatterns( populateOpPatterns(converter, patterns, "__nv_tanhf", "__nv_tanh"); } + +//===----------------------------------------------------------------------===// +// NVVMTargetAttr convert to LLVM attr interface +//===----------------------------------------------------------------------===// + +namespace { +struct NVVMTargetConvertToLLVMAttrInterface + : public ConvertToLLVMAttrInterface::ExternalModel< + NVVMTargetConvertToLLVMAttrInterface, NVVM::NVVMTargetAttr> { + /// Configure GPU to NVVM. + void populateConvertToLLVMConversionPatterns( + Attribute attr, ConversionTarget &target, + LLVMTypeConverter &typeConverter, RewritePatternSet &patterns) const; +}; +} // namespace + +void NVVMTargetConvertToLLVMAttrInterface:: + populateConvertToLLVMConversionPatterns(Attribute attr, + ConversionTarget &target, + LLVMTypeConverter &typeConverter, + RewritePatternSet &patterns) const { + configureGpuToNVVMConversionLegality(target); + configureGpuToNVVMTypeConverter(typeConverter); + populateGpuToNVVMConversionPatterns(typeConverter, patterns); +} + +void mlir::NVVM::registerConvertGpuToNVVMInterface(DialectRegistry ®istry) { + registry.addExtension(+[](MLIRContext *ctx, NVVMDialect *dialect) { + NVVMTargetAttr::attachInterface(*ctx); + }); +} diff --git a/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp b/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp index ce91424e7a577..59b0f5c9b09bc 100644 --- a/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp +++ b/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp @@ -153,6 +153,12 @@ LLVMTypeConverter::LLVMTypeConverter(MLIRContext *ctx, type.isVarArg()); }); + // Helper function that checks if the given value range is a bare pointer. + auto isBarePointer = [](ValueRange values) { + return values.size() == 1 && + isa(values.front().getType()); + }; + // Argument materializations convert from the new block argument types // (multiple SSA values that make up a memref descriptor) back to the // original block argument type. The dialect conversion framework will then @@ -161,11 +167,10 @@ LLVMTypeConverter::LLVMTypeConverter(MLIRContext *ctx, addArgumentMaterialization([&](OpBuilder &builder, UnrankedMemRefType resultType, ValueRange inputs, Location loc) { - if (inputs.size() == 1) { - // Bare pointers are not supported for unranked memrefs because a - // memref descriptor cannot be built just from a bare pointer. + // Note: Bare pointers are not supported for unranked memrefs because a + // memref descriptor cannot be built just from a bare pointer. + if (TypeRange(inputs) != getUnrankedMemRefDescriptorFields()) return Value(); - } Value desc = UnrankedMemRefDescriptor::pack(builder, loc, *this, resultType, inputs); // An argument materialization must return a value of type @@ -177,20 +182,17 @@ LLVMTypeConverter::LLVMTypeConverter(MLIRContext *ctx, addArgumentMaterialization([&](OpBuilder &builder, MemRefType resultType, ValueRange inputs, Location loc) { Value desc; - if (inputs.size() == 1) { - // This is a bare pointer. We allow bare pointers only for function entry - // blocks. - BlockArgument barePtr = dyn_cast(inputs.front()); - if (!barePtr) - return Value(); - Block *block = barePtr.getOwner(); - if (!block->isEntryBlock() || - !isa(block->getParentOp())) - return Value(); + if (isBarePointer(inputs)) { desc = MemRefDescriptor::fromStaticShape(builder, loc, *this, resultType, inputs[0]); - } else { + } else if (TypeRange(inputs) == + getMemRefDescriptorFields(resultType, + /*unpackAggregates=*/true)) { desc = MemRefDescriptor::pack(builder, loc, *this, resultType, inputs); + } else { + // The inputs are neither a bare pointer nor an unpacked memref + // descriptor. This materialization function cannot be used. + return Value(); } // An argument materialization must return a value of type `resultType`, // so insert a cast from the memref descriptor type (!llvm.struct) to the diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp index fe1c47b405190..113bb85a6530e 100644 --- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp +++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp @@ -4739,12 +4739,98 @@ struct CancelDelinearizeOfLinearizeDisjointExactTail return success(); } }; + +/// If the input to a delinearization is a disjoint linearization, and the +/// last k > 1 components of the delinearization basis multiply to the +/// last component of the linearization basis, break the linearization and +/// delinearization into two parts, peeling off the last input to linearization. +/// +/// For example: +/// %0 = affine.linearize_index [%z, %y, %x] by (3, 2, 32) : index +/// %1:4 = affine.delinearize_index %0 by (2, 3, 8, 4) : index, ... +/// becomes +/// %0 = affine.linearize_index [%z, %y] by (3, 2) : index +/// %1:2 = affine.delinearize_index %0 by (2, 3) : index +/// %2:2 = affine.delinearize_index %x by (8, 4) : index +/// where the original %1:4 is replaced by %1:2 ++ %2:2 +struct SplitDelinearizeSpanningLastLinearizeArg final + : OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(affine::AffineDelinearizeIndexOp delinearizeOp, + PatternRewriter &rewriter) const override { + auto linearizeOp = delinearizeOp.getLinearIndex() + .getDefiningOp(); + if (!linearizeOp) + return rewriter.notifyMatchFailure(delinearizeOp, + "index doesn't come from linearize"); + + if (!linearizeOp.getDisjoint()) + return rewriter.notifyMatchFailure(linearizeOp, + "linearize isn't disjoint"); + + int64_t target = linearizeOp.getStaticBasis().back(); + if (ShapedType::isDynamic(target)) + return rewriter.notifyMatchFailure( + linearizeOp, "linearize ends with dynamic basis value"); + + int64_t sizeToSplit = 1; + size_t elemsToSplit = 0; + ArrayRef basis = delinearizeOp.getStaticBasis(); + for (int64_t basisElem : llvm::reverse(basis)) { + if (ShapedType::isDynamic(basisElem)) + return rewriter.notifyMatchFailure( + delinearizeOp, "dynamic basis element while scanning for split"); + sizeToSplit *= basisElem; + elemsToSplit += 1; + + if (sizeToSplit > target) + return rewriter.notifyMatchFailure(delinearizeOp, + "overshot last argument size"); + if (sizeToSplit == target) + break; + } + + if (sizeToSplit < target) + return rewriter.notifyMatchFailure( + delinearizeOp, "product of known basis elements doesn't exceed last " + "linearize argument"); + + if (elemsToSplit < 2) + return rewriter.notifyMatchFailure( + delinearizeOp, + "need at least two elements to form the basis product"); + + Value linearizeWithoutBack = + rewriter.create( + linearizeOp.getLoc(), linearizeOp.getMultiIndex().drop_back(), + linearizeOp.getDynamicBasis(), + linearizeOp.getStaticBasis().drop_back(), + linearizeOp.getDisjoint()); + auto delinearizeWithoutSplitPart = + rewriter.create( + delinearizeOp.getLoc(), linearizeWithoutBack, + delinearizeOp.getDynamicBasis(), basis.drop_back(elemsToSplit), + delinearizeOp.hasOuterBound()); + auto delinearizeBack = rewriter.create( + delinearizeOp.getLoc(), linearizeOp.getMultiIndex().back(), + basis.take_back(elemsToSplit), /*hasOuterBound=*/true); + SmallVector results = llvm::to_vector( + llvm::concat(delinearizeWithoutSplitPart.getResults(), + delinearizeBack.getResults())); + rewriter.replaceOp(delinearizeOp, results); + + return success(); + } +}; } // namespace void affine::AffineDelinearizeIndexOp::getCanonicalizationPatterns( RewritePatternSet &patterns, MLIRContext *context) { - patterns.insert(context); + patterns + .insert( + context); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp index fc31931da0607..e3f316443161f 100644 --- a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp @@ -12,25 +12,9 @@ #include "mlir/Dialect/Affine/Passes.h" -#include "mlir/Analysis/SliceAnalysis.h" -#include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h" -#include "mlir/Dialect/Affine/Analysis/AffineStructures.h" -#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h" #include "mlir/Dialect/Affine/Analysis/Utils.h" -#include "mlir/Dialect/Affine/IR/AffineOps.h" -#include "mlir/Dialect/Affine/LoopUtils.h" -#include "mlir/Dialect/Affine/Utils.h" -#include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/IR/AffineExpr.h" -#include "mlir/IR/AffineMap.h" -#include "mlir/IR/Builders.h" -#include "mlir/IR/Matchers.h" #include "mlir/Interfaces/SideEffectInterfaces.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -41,7 +25,7 @@ namespace affine { } // namespace affine } // namespace mlir -#define DEBUG_TYPE "licm" +#define DEBUG_TYPE "affine-licm" using namespace mlir; using namespace mlir::affine; @@ -49,9 +33,13 @@ using namespace mlir::affine; namespace { /// Affine loop invariant code motion (LICM) pass. -/// TODO: The pass is missing zero-trip tests. -/// TODO: This code should be removed once the new LICM pass can handle its -/// uses. +/// TODO: The pass is missing zero tripcount tests. +/// TODO: When compared to the other standard LICM pass, this pass +/// has some special handling for affine read/write ops but such handling +/// requires aliasing to be sound, and as such this pass is unsound. In +/// addition, this handling is nothing particular to affine memory ops but would +/// apply to any memory read/write effect ops. Either aliasing should be handled +/// or this pass can be removed and the standard LICM can be used. struct LoopInvariantCodeMotion : public affine::impl::AffineLoopInvariantCodeMotionBase< LoopInvariantCodeMotion> { @@ -61,100 +49,80 @@ struct LoopInvariantCodeMotion } // namespace static bool -checkInvarianceOfNestedIfOps(AffineIfOp ifOp, Value indVar, ValueRange iterArgs, +checkInvarianceOfNestedIfOps(AffineIfOp ifOp, AffineForOp loop, SmallPtrSetImpl &opsWithUsers, SmallPtrSetImpl &opsToHoist); -static bool isOpLoopInvariant(Operation &op, Value indVar, ValueRange iterArgs, +static bool isOpLoopInvariant(Operation &op, AffineForOp loop, SmallPtrSetImpl &opsWithUsers, SmallPtrSetImpl &opsToHoist); static bool -areAllOpsInTheBlockListInvariant(Region &blockList, Value indVar, - ValueRange iterArgs, +areAllOpsInTheBlockListInvariant(Region &blockList, AffineForOp loop, SmallPtrSetImpl &opsWithUsers, SmallPtrSetImpl &opsToHoist); -// Returns true if the individual op is loop invariant. -static bool isOpLoopInvariant(Operation &op, Value indVar, ValueRange iterArgs, +/// Returns true if `op` is invariant on `loop`. +static bool isOpLoopInvariant(Operation &op, AffineForOp loop, SmallPtrSetImpl &opsWithUsers, SmallPtrSetImpl &opsToHoist) { - LLVM_DEBUG(llvm::dbgs() << "iterating on op: " << op;); + Value iv = loop.getInductionVar(); if (auto ifOp = dyn_cast(op)) { - if (!checkInvarianceOfNestedIfOps(ifOp, indVar, iterArgs, opsWithUsers, - opsToHoist)) + if (!checkInvarianceOfNestedIfOps(ifOp, loop, opsWithUsers, opsToHoist)) return false; } else if (auto forOp = dyn_cast(op)) { - if (!areAllOpsInTheBlockListInvariant(forOp.getRegion(), indVar, iterArgs, - opsWithUsers, opsToHoist)) + if (!areAllOpsInTheBlockListInvariant(forOp.getRegion(), loop, opsWithUsers, + opsToHoist)) return false; } else if (auto parOp = dyn_cast(op)) { - if (!areAllOpsInTheBlockListInvariant(parOp.getRegion(), indVar, iterArgs, - opsWithUsers, opsToHoist)) + if (!areAllOpsInTheBlockListInvariant(parOp.getRegion(), loop, opsWithUsers, + opsToHoist)) return false; } else if (!isMemoryEffectFree(&op) && - !isa(&op)) { + !isa(&op)) { // Check for side-effecting ops. Affine read/write ops are handled // separately below. return false; - } else if (!matchPattern(&op, m_Constant())) { + } else if (isa(op)) { // Register op in the set of ops that have users. opsWithUsers.insert(&op); - if (isa(op)) { - auto read = dyn_cast(op); - Value memref = read ? read.getMemRef() - : cast(op).getMemRef(); - for (auto *user : memref.getUsers()) { - // If this memref has a user that is a DMA, give up because these - // operations write to this memref. - if (isa(user)) + SmallVector userIVs; + auto read = dyn_cast(op); + Value memref = + read ? read.getMemRef() : cast(op).getMemRef(); + for (auto *user : memref.getUsers()) { + // If the memref used by the load/store is used in a store elsewhere in + // the loop nest, we do not hoist. Similarly, if the memref used in a + // load is also being stored too, we do not hoist the load. + // FIXME: This is missing checking aliases. + if (&op == user) + continue; + if (hasEffect(user, memref) || + (hasEffect(user, memref) && + isa(op))) { + userIVs.clear(); + getAffineForIVs(*user, &userIVs); + // Check that userIVs don't contain the for loop around the op. + if (llvm::is_contained(userIVs, loop)) return false; - // If the memref used by the load/store is used in a store elsewhere in - // the loop nest, we do not hoist. Similarly, if the memref used in a - // load is also being stored too, we do not hoist the load. - if (isa(user) || - (isa(user) && - isa(op))) { - if (&op != user) { - SmallVector userIVs; - getAffineForIVs(*user, &userIVs); - // Check that userIVs don't contain the for loop around the op. - if (llvm::is_contained(userIVs, getForInductionVarOwner(indVar))) - return false; - } - } } } - - if (op.getNumOperands() == 0 && !isa(op)) { - LLVM_DEBUG(llvm::dbgs() << "Non-constant op with 0 operands\n"); - return false; - } } // Check operands. + ValueRange iterArgs = loop.getRegionIterArgs(); for (unsigned int i = 0; i < op.getNumOperands(); ++i) { auto *operandSrc = op.getOperand(i).getDefiningOp(); - LLVM_DEBUG( - op.getOperand(i).print(llvm::dbgs() << "Iterating on operand\n")); - // If the loop IV is the operand, this op isn't loop invariant. - if (indVar == op.getOperand(i)) { - LLVM_DEBUG(llvm::dbgs() << "Loop IV is the operand\n"); + if (iv == op.getOperand(i)) return false; - } // If the one of the iter_args is the operand, this op isn't loop invariant. - if (llvm::is_contained(iterArgs, op.getOperand(i))) { - LLVM_DEBUG(llvm::dbgs() << "One of the iter_args is the operand\n"); + if (llvm::is_contained(iterArgs, op.getOperand(i))) return false; - } if (operandSrc) { - LLVM_DEBUG(llvm::dbgs() << *operandSrc << "Iterating on operand src\n"); - // If the value was defined in the loop (outside of the if/else region), // and that operation itself wasn't meant to be hoisted, then mark this // operation loop dependent. @@ -170,14 +138,13 @@ static bool isOpLoopInvariant(Operation &op, Value indVar, ValueRange iterArgs, // Checks if all ops in a region (i.e. list of blocks) are loop invariant. static bool -areAllOpsInTheBlockListInvariant(Region &blockList, Value indVar, - ValueRange iterArgs, +areAllOpsInTheBlockListInvariant(Region &blockList, AffineForOp loop, SmallPtrSetImpl &opsWithUsers, SmallPtrSetImpl &opsToHoist) { for (auto &b : blockList) { for (auto &op : b) { - if (!isOpLoopInvariant(op, indVar, iterArgs, opsWithUsers, opsToHoist)) + if (!isOpLoopInvariant(op, loop, opsWithUsers, opsToHoist)) return false; } } @@ -187,14 +154,14 @@ areAllOpsInTheBlockListInvariant(Region &blockList, Value indVar, // Returns true if the affine.if op can be hoisted. static bool -checkInvarianceOfNestedIfOps(AffineIfOp ifOp, Value indVar, ValueRange iterArgs, +checkInvarianceOfNestedIfOps(AffineIfOp ifOp, AffineForOp loop, SmallPtrSetImpl &opsWithUsers, SmallPtrSetImpl &opsToHoist) { - if (!areAllOpsInTheBlockListInvariant(ifOp.getThenRegion(), indVar, iterArgs, + if (!areAllOpsInTheBlockListInvariant(ifOp.getThenRegion(), loop, opsWithUsers, opsToHoist)) return false; - if (!areAllOpsInTheBlockListInvariant(ifOp.getElseRegion(), indVar, iterArgs, + if (!areAllOpsInTheBlockListInvariant(ifOp.getElseRegion(), loop, opsWithUsers, opsToHoist)) return false; @@ -202,10 +169,6 @@ checkInvarianceOfNestedIfOps(AffineIfOp ifOp, Value indVar, ValueRange iterArgs, } void LoopInvariantCodeMotion::runOnAffineForOp(AffineForOp forOp) { - auto *loopBody = forOp.getBody(); - auto indVar = forOp.getInductionVar(); - ValueRange iterArgs = forOp.getRegionIterArgs(); - // This is the place where hoisted instructions would reside. OpBuilder b(forOp.getOperation()); @@ -213,14 +176,14 @@ void LoopInvariantCodeMotion::runOnAffineForOp(AffineForOp forOp) { SmallVector opsToMove; SmallPtrSet opsWithUsers; - for (auto &op : *loopBody) { + for (Operation &op : *forOp.getBody()) { // Register op in the set of ops that have users. This set is used // to prevent hoisting ops that depend on these ops that are // not being hoisted. if (!op.use_empty()) opsWithUsers.insert(&op); if (!isa(op)) { - if (isOpLoopInvariant(op, indVar, iterArgs, opsWithUsers, opsToHoist)) { + if (isOpLoopInvariant(op, forOp, opsWithUsers, opsToHoist)) { opsToMove.push_back(&op); } } @@ -231,18 +194,13 @@ void LoopInvariantCodeMotion::runOnAffineForOp(AffineForOp forOp) { for (auto *op : opsToMove) { op->moveBefore(forOp); } - - LLVM_DEBUG(forOp->print(llvm::dbgs() << "Modified loop\n")); } void LoopInvariantCodeMotion::runOnOperation() { // Walk through all loops in a function in innermost-loop-first order. This // way, we first LICM from the inner loop, and place the ops in // the outer loop, which in turn can be further LICM'ed. - getOperation().walk([&](AffineForOp op) { - LLVM_DEBUG(op->print(llvm::dbgs() << "\nOriginal loop\n")); - runOnAffineForOp(op); - }); + getOperation().walk([&](AffineForOp op) { runOnAffineForOp(op); }); } std::unique_ptr> diff --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp index e75d1c571d08c..c5cc8bfeb0a64 100644 --- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp @@ -129,8 +129,13 @@ LogicalResult mlir::affine::promoteIfSingleIteration(AffineForOp forOp) { auto *parentBlock = forOp->getBlock(); if (!iv.use_empty()) { if (forOp.hasConstantLowerBound()) { - OpBuilder topBuilder(forOp->getParentOfType().getBody()); - auto constOp = topBuilder.create( + auto func = forOp->getParentOfType(); + OpBuilder builder(forOp->getContext()); + if (func) + builder.setInsertionPointToStart(&func.getFunctionBody().front()); + else + builder.setInsertionPoint(forOp); + auto constOp = builder.create( forOp.getLoc(), forOp.getConstantLowerBound()); iv.replaceAllUsesWith(constOp); } else { @@ -1936,8 +1941,8 @@ static LogicalResult generateCopy( *nBegin = begin; *nEnd = end; - func::FuncOp f = begin->getParentOfType(); - OpBuilder topBuilder(f.getBody()); + auto f = begin->getParentOfType(); + OpBuilder topBuilder(f.getFunctionBody()); Value zeroIndex = topBuilder.create(f.getLoc(), 0); *sizeInBytes = 0; @@ -1956,8 +1961,9 @@ static LogicalResult generateCopy( OpBuilder &b = region.isWrite() ? epilogue : prologue; // Builder to create constants at the top level. - auto func = copyPlacementBlock->getParent()->getParentOfType(); - OpBuilder top(func.getBody()); + auto func = + copyPlacementBlock->getParent()->getParentOfType(); + OpBuilder top(func.getFunctionBody()); auto loc = region.loc; auto memref = region.memref; @@ -2298,21 +2304,26 @@ mlir::affine::affineDataCopyGenerate(Block::iterator begin, Block::iterator end, // Walk this range of operations to gather all memory regions. block->walk(begin, end, [&](Operation *opInst) { + Value memref; + MemRefType memrefType; // Gather regions to allocate to buffers in faster memory space. if (auto loadOp = dyn_cast(opInst)) { - if ((filterMemRef.has_value() && filterMemRef != loadOp.getMemRef()) || - (loadOp.getMemRefType().getMemorySpaceAsInt() != - copyOptions.slowMemorySpace)) - return; + memref = loadOp.getMemRef(); + memrefType = loadOp.getMemRefType(); } else if (auto storeOp = dyn_cast(opInst)) { - if ((filterMemRef.has_value() && filterMemRef != storeOp.getMemRef()) || - storeOp.getMemRefType().getMemorySpaceAsInt() != - copyOptions.slowMemorySpace) - return; - } else { - // Neither load nor a store op. - return; + memref = storeOp.getMemRef(); + memrefType = storeOp.getMemRefType(); } + // Neither load nor a store op. + if (!memref) + return; + + auto memorySpaceAttr = + dyn_cast_or_null(memrefType.getMemorySpace()); + if ((filterMemRef.has_value() && filterMemRef != memref) || + (memorySpaceAttr && + memrefType.getMemorySpaceAsInt() != copyOptions.slowMemorySpace)) + return; // Compute the MemRefRegion accessed. auto region = std::make_unique(opInst->getLoc()); diff --git a/mlir/lib/Dialect/Affine/Utils/Utils.cpp b/mlir/lib/Dialect/Affine/Utils/Utils.cpp index 6bab289859e87..a01776dd6ddf5 100644 --- a/mlir/lib/Dialect/Affine/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/Utils.cpp @@ -1391,11 +1391,11 @@ LogicalResult mlir::affine::replaceAllMemRefUsesWith( std::unique_ptr postDomInfo; if (domOpFilter) domInfo = std::make_unique( - domOpFilter->getParentOfType()); + domOpFilter->getParentOfType()); if (postDomOpFilter) postDomInfo = std::make_unique( - postDomOpFilter->getParentOfType()); + postDomOpFilter->getParentOfType()); // Walk all uses of old memref; collect ops to perform replacement. We use a // DenseSet since an operation could potentially have multiple uses of a diff --git a/mlir/lib/Dialect/Func/Transforms/FuncConversions.cpp b/mlir/lib/Dialect/Func/Transforms/FuncConversions.cpp index eb444d665ff26..b1cde6ca5d2fc 100644 --- a/mlir/lib/Dialect/Func/Transforms/FuncConversions.cpp +++ b/mlir/lib/Dialect/Func/Transforms/FuncConversions.cpp @@ -23,21 +23,34 @@ struct CallOpSignatureConversion : public OpConversionPattern { LogicalResult matchAndRewrite(CallOp callOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - // Convert the original function results. + // Convert the original function results. Keep track of how many result + // types an original result type is converted into. + SmallVector numResultsReplacments; SmallVector convertedResults; - if (failed(typeConverter->convertTypes(callOp.getResultTypes(), - convertedResults))) - return failure(); - - // If this isn't a one-to-one type mapping, we don't know how to aggregate - // the results. - if (callOp->getNumResults() != convertedResults.size()) - return failure(); + size_t numFlattenedResults = 0; + for (auto [idx, type] : llvm::enumerate(callOp.getResultTypes())) { + if (failed(typeConverter->convertTypes(type, convertedResults))) + return failure(); + numResultsReplacments.push_back(convertedResults.size() - + numFlattenedResults); + numFlattenedResults = convertedResults.size(); + } // Substitute with the new result types from the corresponding FuncType // conversion. - rewriter.replaceOpWithNewOp( - callOp, callOp.getCallee(), convertedResults, adaptor.getOperands()); + auto newCallOp = + rewriter.create(callOp.getLoc(), callOp.getCallee(), + convertedResults, adaptor.getOperands()); + SmallVector replacements; + size_t offset = 0; + for (int i = 0, e = callOp->getNumResults(); i < e; ++i) { + replacements.push_back( + newCallOp->getResults().slice(offset, numResultsReplacments[i])); + offset += numResultsReplacments[i]; + } + assert(offset == convertedResults.size() && + "expected that all converted results are used"); + rewriter.replaceOpWithMultiple(callOp, replacements); return success(); } }; diff --git a/mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp b/mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp index 1adc381092bf3..0ffd8131b8934 100644 --- a/mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp @@ -132,6 +132,29 @@ collectEffects(Operation *op, return false; } +/// Get all effects before the given operation caused by other operations in the +/// same block. That is, this will not consider operations beyond the block. +static bool +getEffectsBeforeInBlock(Operation *op, + SmallVectorImpl &effects, + bool stopAtBarrier) { + if (op == &op->getBlock()->front()) + return true; + + for (Operation *it = op->getPrevNode(); it != nullptr; + it = it->getPrevNode()) { + if (isa(it)) { + if (stopAtBarrier) + return true; + continue; + } + + if (!collectEffects(it, effects)) + return false; + } + return true; +} + /// Collects memory effects from operations that may be executed before `op` in /// a trivial structured control flow, e.g., without branches. Stops at the /// parallel region boundary or at the barrier operation if `stopAtBarrier` is @@ -153,19 +176,7 @@ getEffectsBefore(Operation *op, } // Collect all effects before the op. - if (op != &op->getBlock()->front()) { - for (Operation *it = op->getPrevNode(); it != nullptr; - it = it->getPrevNode()) { - if (isa(it)) { - if (stopAtBarrier) - return true; - else - continue; - } - if (!collectEffects(it, effects)) - return false; - } - } + getEffectsBeforeInBlock(op, effects, stopAtBarrier); // Stop if reached the parallel region boundary. if (isParallelRegionBoundary(op->getParentOp())) @@ -191,8 +202,8 @@ getEffectsBefore(Operation *op, // appropriately. if (isSequentialLoopLike(op->getParentOp())) { // Assuming loop terminators have no side effects. - return getEffectsBefore(op->getBlock()->getTerminator(), effects, - /*stopAtBarrier=*/true); + return getEffectsBeforeInBlock(op->getBlock()->getTerminator(), effects, + /*stopAtBarrier=*/true); } // If the parent operation is not guaranteed to execute its (single-block) @@ -212,6 +223,28 @@ getEffectsBefore(Operation *op, return !conservative; } +/// Get all effects after the given operation caused by other operations in the +/// same block. That is, this will not consider operations beyond the block. +static bool +getEffectsAfterInBlock(Operation *op, + SmallVectorImpl &effects, + bool stopAtBarrier) { + if (op == &op->getBlock()->back()) + return true; + + for (Operation *it = op->getNextNode(); it != nullptr; + it = it->getNextNode()) { + if (isa(it)) { + if (stopAtBarrier) + return true; + continue; + } + if (!collectEffects(it, effects)) + return false; + } + return true; +} + /// Collects memory effects from operations that may be executed after `op` in /// a trivial structured control flow, e.g., without branches. Stops at the /// parallel region boundary or at the barrier operation if `stopAtBarrier` is @@ -233,17 +266,7 @@ getEffectsAfter(Operation *op, } // Collect all effects after the op. - if (op != &op->getBlock()->back()) - for (Operation *it = op->getNextNode(); it != nullptr; - it = it->getNextNode()) { - if (isa(it)) { - if (stopAtBarrier) - return true; - continue; - } - if (!collectEffects(it, effects)) - return false; - } + getEffectsAfterInBlock(op, effects, stopAtBarrier); // Stop if reached the parallel region boundary. if (isParallelRegionBoundary(op->getParentOp())) @@ -272,8 +295,8 @@ getEffectsAfter(Operation *op, return true; bool exact = collectEffects(&op->getBlock()->front(), effects); - return getEffectsAfter(&op->getBlock()->front(), effects, - /*stopAtBarrier=*/true) && + return getEffectsAfterInBlock(&op->getBlock()->front(), effects, + /*stopAtBarrier=*/true) && exact; } diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 9bb0c80749a5f..d30a6b8398f06 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -3510,8 +3510,7 @@ void LLVMDialect::initialize() { LLVMPPCFP128Type, LLVMTokenType, LLVMLabelType, - LLVMMetadataType, - LLVMStructType>(); + LLVMMetadataType>(); // clang-format on registerTypes(); diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp index 903035a3ec229..655316cc5d66d 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp @@ -1566,7 +1566,7 @@ DeletionKind LLVM::MemmoveOp::rewire(const DestructurableMemorySlot &slot, //===----------------------------------------------------------------------===// std::optional> -LLVM::LLVMStructType::getSubelementIndexMap() { +LLVM::LLVMStructType::getSubelementIndexMap() const { Type i32 = IntegerType::get(getContext(), 32); DenseMap destructured; for (const auto &[index, elemType] : llvm::enumerate(getBody())) @@ -1574,7 +1574,7 @@ LLVM::LLVMStructType::getSubelementIndexMap() { return destructured; } -Type LLVM::LLVMStructType::getTypeAtIndex(Attribute index) { +Type LLVM::LLVMStructType::getTypeAtIndex(Attribute index) const { auto indexAttr = llvm::dyn_cast(index); if (!indexAttr || !indexAttr.getType().isInteger(32)) return {}; diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp index 1bed3fa48b30d..33c231e2d2045 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp @@ -485,7 +485,7 @@ bool LLVMStructType::isOpaque() { (getImpl()->isOpaque() || !getImpl()->isInitialized()); } bool LLVMStructType::isInitialized() { return getImpl()->isInitialized(); } -StringRef LLVMStructType::getName() { return getImpl()->getIdentifier(); } +StringRef LLVMStructType::getName() const { return getImpl()->getIdentifier(); } ArrayRef LLVMStructType::getBody() const { return isIdentified() ? getImpl()->getIdentifiedStructBody() : getImpl()->getTypeList(); diff --git a/mlir/lib/Dialect/SPIRV/IR/MemoryOps.cpp b/mlir/lib/Dialect/SPIRV/IR/MemoryOps.cpp index 154e955d6057a..5ae27e5d82bd7 100644 --- a/mlir/lib/Dialect/SPIRV/IR/MemoryOps.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/MemoryOps.cpp @@ -543,56 +543,6 @@ LogicalResult CopyMemoryOp::verify() { return verifySourceMemoryAccessAttribute(*this); } -static ParseResult parsePtrAccessChainOpImpl(StringRef opName, - OpAsmParser &parser, - OperationState &state) { - OpAsmParser::UnresolvedOperand ptrInfo; - SmallVector indicesInfo; - Type type; - auto loc = parser.getCurrentLocation(); - SmallVector indicesTypes; - - if (parser.parseOperand(ptrInfo) || - parser.parseOperandList(indicesInfo, OpAsmParser::Delimiter::Square) || - parser.parseColonType(type) || - parser.resolveOperand(ptrInfo, type, state.operands)) - return failure(); - - // Check that the provided indices list is not empty before parsing their - // type list. - if (indicesInfo.empty()) - return emitError(state.location) << opName << " expected element"; - - if (parser.parseComma() || parser.parseTypeList(indicesTypes)) - return failure(); - - // Check that the indices types list is not empty and that it has a one-to-one - // mapping to the provided indices. - if (indicesTypes.size() != indicesInfo.size()) - return emitError(state.location) - << opName - << " indices types' count must be equal to indices info count"; - - if (parser.resolveOperands(indicesInfo, indicesTypes, loc, state.operands)) - return failure(); - - auto resultType = getElementPtrType( - type, llvm::ArrayRef(state.operands).drop_front(2), state.location); - if (!resultType) - return failure(); - - state.addTypes(resultType); - return success(); -} - -template -static auto concatElemAndIndices(Op op) { - SmallVector ret(op.getIndices().size() + 1); - ret[0] = op.getElement(); - llvm::copy(op.getIndices(), ret.begin() + 1); - return ret; -} - //===----------------------------------------------------------------------===// // spirv.InBoundsPtrAccessChainOp //===----------------------------------------------------------------------===// @@ -605,16 +555,6 @@ void InBoundsPtrAccessChainOp::build(OpBuilder &builder, OperationState &state, build(builder, state, type, basePtr, element, indices); } -ParseResult InBoundsPtrAccessChainOp::parse(OpAsmParser &parser, - OperationState &result) { - return parsePtrAccessChainOpImpl( - spirv::InBoundsPtrAccessChainOp::getOperationName(), parser, result); -} - -void InBoundsPtrAccessChainOp::print(OpAsmPrinter &printer) { - printAccessChain(*this, concatElemAndIndices(*this), printer); -} - LogicalResult InBoundsPtrAccessChainOp::verify() { return verifyAccessChain(*this, getIndices()); } @@ -630,16 +570,6 @@ void PtrAccessChainOp::build(OpBuilder &builder, OperationState &state, build(builder, state, type, basePtr, element, indices); } -ParseResult PtrAccessChainOp::parse(OpAsmParser &parser, - OperationState &result) { - return parsePtrAccessChainOpImpl(spirv::PtrAccessChainOp::getOperationName(), - parser, result); -} - -void PtrAccessChainOp::print(OpAsmPrinter &printer) { - printAccessChain(*this, concatElemAndIndices(*this), printer); -} - LogicalResult PtrAccessChainOp::verify() { return verifyAccessChain(*this, getIndices()); } diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp index 51fe49a3c0bf8..5e8907eb2d4a3 100644 --- a/mlir/lib/IR/AsmPrinter.cpp +++ b/mlir/lib/IR/AsmPrinter.cpp @@ -2040,12 +2040,23 @@ void AsmPrinter::Impl::printLocationInternal(LocationAttr loc, bool pretty, else os << "unknown"; }) - .Case([&](FileLineColLoc loc) { + .Case([&](FileLineColRange loc) { if (pretty) os << loc.getFilename().getValue(); else printEscapedString(loc.getFilename()); - os << ':' << loc.getLine() << ':' << loc.getColumn(); + if (loc.getEndColumn() == loc.getStartColumn() && + loc.getStartLine() == loc.getEndLine()) { + os << ':' << loc.getStartLine() << ':' << loc.getStartColumn(); + return; + } + if (loc.getStartLine() == loc.getEndLine()) { + os << ':' << loc.getStartLine() << ':' << loc.getStartColumn() + << " to :" << loc.getEndColumn(); + return; + } + os << ':' << loc.getStartLine() << ':' << loc.getStartColumn() << " to " + << loc.getEndLine() << ':' << loc.getEndColumn(); }) .Case([&](NameLoc loc) { printEscapedString(loc.getName()); diff --git a/mlir/lib/IR/BuiltinDialectBytecode.cpp b/mlir/lib/IR/BuiltinDialectBytecode.cpp index 6131b7eae90c8..6095c6bcb2ce3 100644 --- a/mlir/lib/IR/BuiltinDialectBytecode.cpp +++ b/mlir/lib/IR/BuiltinDialectBytecode.cpp @@ -14,7 +14,10 @@ #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Diagnostics.h" #include "mlir/IR/DialectResourceBlobManager.h" +#include "mlir/IR/Location.h" +#include "mlir/Support/LLVM.h" #include "llvm/ADT/TypeSwitch.h" +#include using namespace mlir; @@ -70,8 +73,8 @@ readPotentiallySplatString(DialectBytecodeReader &reader, ShapedType type, return success(); } -void writePotentiallySplatString(DialectBytecodeWriter &writer, - DenseStringElementsAttr attr) { +static void writePotentiallySplatString(DialectBytecodeWriter &writer, + DenseStringElementsAttr attr) { bool isSplat = attr.isSplat(); if (isSplat) return writer.writeOwnedString(attr.getRawStringData().front()); @@ -80,6 +83,70 @@ void writePotentiallySplatString(DialectBytecodeWriter &writer, writer.writeOwnedString(str); } +static FileLineColRange getFileLineColRange(MLIRContext *context, + StringAttr filename, + ArrayRef lineCols) { + switch (lineCols.size()) { + case 0: + return FileLineColRange::get(filename); + case 1: + return FileLineColRange::get(filename, lineCols[0]); + case 2: + return FileLineColRange::get(filename, lineCols[0], lineCols[1]); + case 3: + return FileLineColRange::get(filename, lineCols[0], lineCols[1], + lineCols[2]); + case 4: + return FileLineColRange::get(filename, lineCols[0], lineCols[1], + lineCols[2], lineCols[3]); + default: + return {}; + } +} + +static LogicalResult +readFileLineColRangeLocs(DialectBytecodeReader &reader, + SmallVectorImpl &lineCols) { + return reader.readList( + lineCols, [&reader](uint64_t &val) { return reader.readVarInt(val); }); +} + +static void writeFileLineColRangeLocs(DialectBytecodeWriter &writer, + FileLineColRange range) { + if (range.getStartLine() == 0 && range.getStartColumn() == 0 && + range.getEndLine() == 0 && range.getEndColumn() == 0) { + writer.writeVarInt(0); + return; + } + if (range.getStartColumn() == 0 && + range.getStartLine() == range.getEndLine()) { + writer.writeVarInt(1); + writer.writeVarInt(range.getStartLine()); + return; + } + // The single file:line:col is handled by other writer, but checked here for + // completeness. + if (range.getEndColumn() == range.getStartColumn() && + range.getStartLine() == range.getEndLine()) { + writer.writeVarInt(2); + writer.writeVarInt(range.getStartLine()); + writer.writeVarInt(range.getStartColumn()); + return; + } + if (range.getStartLine() == range.getEndLine()) { + writer.writeVarInt(3); + writer.writeVarInt(range.getStartLine()); + writer.writeVarInt(range.getStartColumn()); + writer.writeVarInt(range.getEndColumn()); + return; + } + writer.writeVarInt(4); + writer.writeVarInt(range.getStartLine()); + writer.writeVarInt(range.getStartColumn()); + writer.writeVarInt(range.getEndLine()); + writer.writeVarInt(range.getEndColumn()); +} + #include "mlir/IR/BuiltinDialectBytecode.cpp.inc" /// This class implements the bytecode interface for the builtin dialect. diff --git a/mlir/lib/IR/Location.cpp b/mlir/lib/IR/Location.cpp index dbd84912a8657..ce78d30ee0a52 100644 --- a/mlir/lib/IR/Location.cpp +++ b/mlir/lib/IR/Location.cpp @@ -7,31 +7,118 @@ //===----------------------------------------------------------------------===// #include "mlir/IR/Location.h" +#include "mlir/IR/AttributeSupport.h" +#include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/BuiltinDialect.h" +#include "mlir/IR/MLIRContext.h" #include "mlir/IR/Visitors.h" +#include "mlir/Support/LLVM.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/TypeSwitch.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/TrailingObjects.h" +#include +#include +#include +#include +#include +#include using namespace mlir; using namespace mlir::detail; -//===----------------------------------------------------------------------===// -/// Tablegen Attribute Definitions -//===----------------------------------------------------------------------===// +namespace mlir::detail { +struct FileLineColRangeAttrStorage final + : public ::mlir::AttributeStorage, + public llvm::TrailingObjects { + using PointerPair = llvm::PointerIntPair; + using KeyTy = std::tuple>; -#define GET_ATTRDEF_CLASSES -#include "mlir/IR/BuiltinLocationAttributes.cpp.inc" + FileLineColRangeAttrStorage(StringAttr filename, int numLocs) + : filenameAndTrailing(filename, numLocs) {} + + static FileLineColRangeAttrStorage * + construct(::mlir::AttributeStorageAllocator &allocator, KeyTy &&tblgenKey) { + auto numInArray = std::get<1>(tblgenKey).size(); + // Note: Considered asserting that numInArray is at least 1, but this + // is not needed in memory or in printed form. This should very rarely be + // 0 here as that means a NamedLoc would have been more efficient. But this + // does allow for location with just a file, and also having the interface + // be more uniform. + auto locEnc = numInArray == 0 ? 1 : numInArray; + // Allocate a new storage instance. + auto byteSize = + FileLineColRangeAttrStorage::totalSizeToAlloc(locEnc - 1); + auto *rawMem = + allocator.allocate(byteSize, alignof(FileLineColRangeAttrStorage)); + auto *result = ::new (rawMem) FileLineColRangeAttrStorage( + std::move(std::get<0>(tblgenKey)), locEnc - 1); + if (numInArray > 0) { + result->startLine = std::get<1>(tblgenKey)[0]; + // Copy in the element types into the trailing storage. + std::uninitialized_copy(std::next(std::get<1>(tblgenKey).begin()), + std::get<1>(tblgenKey).end(), + result->getTrailingObjects()); + } + return result; + } + + // Return the number of held types. + unsigned size() const { return filenameAndTrailing.getInt() + 1; } + + bool operator==(const KeyTy &tblgenKey) const { + return (filenameAndTrailing.getPointer() == std::get<0>(tblgenKey)) && + (size() == std::get<1>(tblgenKey).size()) && + (startLine == std::get<1>(tblgenKey)[0]) && + (ArrayRef{getTrailingObjects(), size() - 1} == + ArrayRef{std::get<1>(tblgenKey)}.drop_front()); + } + + unsigned getLineCols(unsigned index) const { + return getTrailingObjects()[index - 1]; + } + + unsigned getStartLine() const { return startLine; } + unsigned getStartColumn() const { + if (size() <= 1) + return 0; + return getLineCols(1); + } + unsigned getEndColumn() const { + if (size() <= 2) + return getStartColumn(); + return getLineCols(2); + } + unsigned getEndLine() const { + if (size() <= 3) + return getStartLine(); + return getLineCols(3); + } + + static ::llvm::hash_code hashKey(const KeyTy &tblgenKey) { + return ::llvm::hash_combine(std::get<0>(tblgenKey), std::get<1>(tblgenKey)); + } + + // Supports + // - 0 (file:line) + // - 1 (file:line:col) + // - 2 (file:line:start_col to file:line:end_col) and + // - 3 (file:start_line:start_col to file:end_line:end_col) + llvm::PointerIntPair filenameAndTrailing; + unsigned startLine = 0; +}; +} // namespace mlir::detail //===----------------------------------------------------------------------===// -// BuiltinDialect +/// Tablegen Attribute Definitions //===----------------------------------------------------------------------===// -void BuiltinDialect::registerLocationAttributes() { - addAttributes< -#define GET_ATTRDEF_LIST +#define GET_ATTRDEF_CLASSES #include "mlir/IR/BuiltinLocationAttributes.cpp.inc" - >(); -} //===----------------------------------------------------------------------===// // LocationAttr @@ -66,6 +153,59 @@ CallSiteLoc CallSiteLoc::get(Location name, ArrayRef frames) { return CallSiteLoc::get(name, caller); } +//===----------------------------------------------------------------------===// +// FileLineColLoc +//===----------------------------------------------------------------------===// + +FileLineColLoc FileLineColLoc::get(StringAttr filename, unsigned line, + unsigned column) { + return llvm::cast( + FileLineColRange::get(filename, line, column)); +} + +FileLineColLoc FileLineColLoc::get(MLIRContext *context, StringRef fileName, + unsigned line, unsigned column) { + return llvm::cast( + FileLineColRange::get(context, fileName, line, column)); +} + +StringAttr FileLineColLoc::getFilename() const { + return FileLineColRange::getFilename(); +} + +unsigned FileLineColLoc::getLine() const { return getStartLine(); } + +unsigned FileLineColLoc::getColumn() const { return getStartColumn(); } + +bool FileLineColLoc::classof(Attribute attr) { + // This could also have been for <= 2. But given this is matching previous + // behavior, it is left as is. + if (auto range = mlir::dyn_cast(attr)) + return range.getImpl()->size() == 2; + return false; +} + +//===----------------------------------------------------------------------===// +// FileLineColRange +//===----------------------------------------------------------------------===// + +StringAttr FileLineColRange::getFilename() const { + return getImpl()->filenameAndTrailing.getPointer(); +} + +unsigned FileLineColRange::getStartLine() const { + return getImpl()->getStartLine(); +} +unsigned FileLineColRange::getStartColumn() const { + return getImpl()->getStartColumn(); +} +unsigned FileLineColRange::getEndColumn() const { + return getImpl()->getEndColumn(); +} +unsigned FileLineColRange::getEndLine() const { + return getImpl()->getEndLine(); +} + //===----------------------------------------------------------------------===// // FusedLoc //===----------------------------------------------------------------------===// @@ -107,3 +247,14 @@ Location FusedLoc::get(ArrayRef locs, Attribute metadata, return Base::get(context, locs, metadata); } + +//===----------------------------------------------------------------------===// +// BuiltinDialect +//===----------------------------------------------------------------------===// + +void BuiltinDialect::registerLocationAttributes() { + addAttributes< +#define GET_ATTRDEF_LIST +#include "mlir/IR/BuiltinLocationAttributes.cpp.inc" + >(); +} diff --git a/mlir/lib/Transforms/RemoveDeadValues.cpp b/mlir/lib/Transforms/RemoveDeadValues.cpp index b82280dda8ba7..0aa9dcb36681b 100644 --- a/mlir/lib/Transforms/RemoveDeadValues.cpp +++ b/mlir/lib/Transforms/RemoveDeadValues.cpp @@ -577,10 +577,8 @@ void RemoveDeadValues::runOnOperation() { WalkResult acceptableIR = module->walk([&](Operation *op) { if (op == module) return WalkResult::advance(); - if (isa(op) || - (isa(op) && !isa(op))) { - op->emitError() << "cannot optimize an IR with " - "non-call symbol user ops or branch ops\n"; + if (isa(op)) { + op->emitError() << "cannot optimize an IR with branch ops\n"; return WalkResult::interrupt(); } return WalkResult::advance(); diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index 5acd095da8e38..710c976281dc3 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -434,23 +434,25 @@ class MoveBlockRewrite : public BlockRewrite { class BlockTypeConversionRewrite : public BlockRewrite { public: BlockTypeConversionRewrite(ConversionPatternRewriterImpl &rewriterImpl, - Block *block, Block *origBlock) - : BlockRewrite(Kind::BlockTypeConversion, rewriterImpl, block), - origBlock(origBlock) {} + Block *origBlock, Block *newBlock) + : BlockRewrite(Kind::BlockTypeConversion, rewriterImpl, origBlock), + newBlock(newBlock) {} static bool classof(const IRRewrite *rewrite) { return rewrite->getKind() == Kind::BlockTypeConversion; } - Block *getOrigBlock() const { return origBlock; } + Block *getOrigBlock() const { return block; } + + Block *getNewBlock() const { return newBlock; } void commit(RewriterBase &rewriter) override; void rollback() override; private: - /// The original block that was requested to have its signature converted. - Block *origBlock; + /// The new block that was created as part of this signature conversion. + Block *newBlock; }; /// Replacing a block argument. This rewrite is not immediately reflected in the @@ -721,6 +723,18 @@ static bool hasRewrite(R &&rewrites, Operation *op) { }); } +#ifndef NDEBUG +/// Return "true" if there is a block rewrite that matches the specified +/// rewrite type and block among the given rewrites. +template +static bool hasRewrite(R &&rewrites, Block *block) { + return any_of(std::forward(rewrites), [&](auto &rewrite) { + auto *rewriteTy = dyn_cast(rewrite.get()); + return rewriteTy && rewriteTy->getBlock() == block; + }); +} +#endif // NDEBUG + //===----------------------------------------------------------------------===// // ConversionPatternRewriterImpl //===----------------------------------------------------------------------===// @@ -966,12 +980,12 @@ void BlockTypeConversionRewrite::commit(RewriterBase &rewriter) { // block. if (auto *listener = dyn_cast_or_null(rewriter.getListener())) - for (Operation *op : block->getUsers()) + for (Operation *op : getNewBlock()->getUsers()) listener->notifyOperationModified(op); } void BlockTypeConversionRewrite::rollback() { - block->replaceAllUsesWith(origBlock); + getNewBlock()->replaceAllUsesWith(getOrigBlock()); } void ReplaceBlockArgRewrite::commit(RewriterBase &rewriter) { @@ -1223,6 +1237,9 @@ Block *ConversionPatternRewriterImpl::applySignatureConversion( ConversionPatternRewriter &rewriter, Block *block, const TypeConverter *converter, TypeConverter::SignatureConversion &signatureConversion) { + // A block cannot be converted multiple times. + assert(!hasRewrite(rewrites, block) && + "block was already converted"); OpBuilder::InsertionGuard g(rewriter); // If no arguments are being changed or added, there is nothing to do. @@ -1308,7 +1325,7 @@ Block *ConversionPatternRewriterImpl::applySignatureConversion( appendRewrite(block, origArg, converter); } - appendRewrite(newBlock, block); + appendRewrite(/*origBlock=*/block, newBlock); // Erase the old block. (It is just unlinked for now and will be erased during // cleanup.) diff --git a/mlir/python/mlir/_mlir_libs/__init__.py b/mlir/python/mlir/_mlir_libs/__init__.py index 98dbbc6adf9ce..c5cb22c6dccb8 100644 --- a/mlir/python/mlir/_mlir_libs/__init__.py +++ b/mlir/python/mlir/_mlir_libs/__init__.py @@ -80,9 +80,16 @@ def _site_initialize(): logger = logging.getLogger(__name__) post_init_hooks = [] disable_multithreading = False + # This flag disables eagerly loading all dialects. Eagerly loading is often + # not the desired behavior (see + # https://github.com/llvm/llvm-project/issues/56037), and the logic is that + # if any module has this attribute set, then we don't load all (e.g., it's + # being used in a solution where the loading is controlled). + disable_load_all_available_dialects = False def process_initializer_module(module_name): nonlocal disable_multithreading + nonlocal disable_load_all_available_dialects try: m = importlib.import_module(f".{module_name}", __name__) except ModuleNotFoundError: @@ -107,6 +114,8 @@ def process_initializer_module(module_name): if bool(m.disable_multithreading): logger.debug("Disabling multi-threading for context") disable_multithreading = True + if hasattr(m, "disable_load_all_available_dialects"): + disable_load_all_available_dialects = True return True # If _mlirRegisterEverything is built, then include it as an initializer @@ -130,10 +139,8 @@ def __init__(self, *args, **kwargs): hook(self) if not disable_multithreading: self.enable_multithreading(True) - # TODO: There is some debate about whether we should eagerly load - # all dialects. It is being done here in order to preserve existing - # behavior. See: https://github.com/llvm/llvm-project/issues/56037 - self.load_all_available_dialects() + if not disable_load_all_available_dialects: + self.load_all_available_dialects() if init_module: logger.debug( "Registering translations from initializer %r", init_module diff --git a/mlir/python/requirements.txt b/mlir/python/requirements.txt index eeaac27461b11..272d066831f92 100644 --- a/mlir/python/requirements.txt +++ b/mlir/python/requirements.txt @@ -1,4 +1,4 @@ numpy>=1.19.5, <=2.1.2 -pybind11>=2.9.0, <=2.13.6 +pybind11>=2.10.0, <=2.13.6 PyYAML>=5.4.0, <=6.0.1 ml_dtypes>=0.1.0, <=0.5.0 # provides several NumPy dtype extensions, including the bf16 diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm-target-attr.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm-target-attr.mlir new file mode 100644 index 0000000000000..ed7fa6508d5ad --- /dev/null +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm-target-attr.mlir @@ -0,0 +1,42 @@ +// RUN: mlir-opt %s --pass-pipeline="builtin.module(gpu.module(convert-to-llvm{dynamic=true}))" | FileCheck %s + +// CHECK-LABEL: gpu.module @nvvm_module +gpu.module @nvvm_module [#nvvm.target] { + // CHECK-LABEL: llvm.func @kernel_0() + func.func @kernel_0() -> index { + // CHECK: = nvvm.read.ptx.sreg.tid.x : i32 + // CHECK: = llvm.sext %{{.*}} : i32 to i64 + %tIdX = gpu.thread_id x + // CHECK: = nvvm.read.ptx.sreg.laneid range : i32 + // CHECK: = llvm.sext %{{.*}} : i32 to i64 + %laneId = gpu.lane_id + %sum = index.add %tIdX, %laneId + func.return %sum : index + } + +// CHECK-LABEL: llvm.func @kernel_1 +// CHECK: (%{{.*}}: !llvm.ptr<1>, %arg1: !llvm.ptr<1>, %arg2: i64) +// CHECK: attributes {gpu.kernel, gpu.known_block_size = array, nvvm.kernel, nvvm.maxntid = array} + gpu.func @kernel_1(%arg0 : memref>) kernel attributes {known_block_size = array} { + gpu.return + } +} + +// CHECK-LABEL: gpu.module @nvvm_module_2 +gpu.module @nvvm_module_2 { + // CHECK-LABEL: llvm.func @kernel_0() + func.func @kernel_0() -> index { + // CHECK: = gpu.thread_id x + %tIdX = gpu.thread_id x + // CHECK: = gpu.lane_id + %laneId = gpu.lane_id + %sum = index.add %tIdX, %laneId + func.return %sum : index + } + +// CHECK-LABEL: gpu.func @kernel_1 +// CHECK: (%{{.*}}: memref>) kernel attributes {known_block_size = array} + gpu.func @kernel_1(%arg0 : memref>) kernel attributes {known_block_size = array} { + gpu.return + } +} diff --git a/mlir/test/Conversion/MemRefToLLVM/type-conversion.mlir b/mlir/test/Conversion/MemRefToLLVM/type-conversion.mlir new file mode 100644 index 0000000000000..0288aa11313c7 --- /dev/null +++ b/mlir/test/Conversion/MemRefToLLVM/type-conversion.mlir @@ -0,0 +1,57 @@ +// RUN: mlir-opt %s -test-llvm-legalize-patterns -split-input-file + +// Test the argument materializer for ranked MemRef types. + +// CHECK-LABEL: func @construct_ranked_memref_descriptor( +// CHECK: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK-COUNT-7: llvm.insertvalue +// CHECK: builtin.unrealized_conversion_cast %{{.*}} : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> to memref<5x4xf32> +func.func @construct_ranked_memref_descriptor(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: i64, %arg3: i64, %arg4: i64, %arg5: i64, %arg6: i64) { + %0 = "test.direct_replacement"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6) : (!llvm.ptr, !llvm.ptr, i64, i64, i64, i64, i64) -> (memref<5x4xf32>) + "test.legal_op"(%0) : (memref<5x4xf32>) -> () + return +} + +// ----- + +// The argument materializer for ranked MemRef types is called with incorrect +// input types. Make sure that the materializer is skipped and we do not +// generate invalid IR. + +// CHECK-LABEL: func @invalid_ranked_memref_descriptor( +// CHECK: %[[cast:.*]] = builtin.unrealized_conversion_cast %{{.*}} : i1 to memref<5x4xf32> +// CHECK: "test.legal_op"(%[[cast]]) +func.func @invalid_ranked_memref_descriptor(%arg0: i1) { + %0 = "test.direct_replacement"(%arg0) : (i1) -> (memref<5x4xf32>) + "test.legal_op"(%0) : (memref<5x4xf32>) -> () + return +} + +// ----- + +// Test the argument materializer for unranked MemRef types. + +// CHECK-LABEL: func @construct_unranked_memref_descriptor( +// CHECK: llvm.mlir.undef : !llvm.struct<(i64, ptr)> +// CHECK-COUNT-2: llvm.insertvalue +// CHECK: builtin.unrealized_conversion_cast %{{.*}} : !llvm.struct<(i64, ptr)> to memref<*xf32> +func.func @construct_unranked_memref_descriptor(%arg0: i64, %arg1: !llvm.ptr) { + %0 = "test.direct_replacement"(%arg0, %arg1) : (i64, !llvm.ptr) -> (memref<*xf32>) + "test.legal_op"(%0) : (memref<*xf32>) -> () + return +} + +// ----- + +// The argument materializer for unranked MemRef types is called with incorrect +// input types. Make sure that the materializer is skipped and we do not +// generate invalid IR. + +// CHECK-LABEL: func @invalid_unranked_memref_descriptor( +// CHECK: %[[cast:.*]] = builtin.unrealized_conversion_cast %{{.*}} : i1 to memref<*xf32> +// CHECK: "test.legal_op"(%[[cast]]) +func.func @invalid_unranked_memref_descriptor(%arg0: i1) { + %0 = "test.direct_replacement"(%arg0) : (i1) -> (memref<*xf32>) + "test.legal_op"(%0) : (memref<*xf32>) -> () + return +} diff --git a/mlir/test/Dialect/Affine/affine-data-copy.mlir b/mlir/test/Dialect/Affine/affine-data-copy.mlir index fe3b4a206e2b9..330cf92bafba4 100644 --- a/mlir/test/Dialect/Affine/affine-data-copy.mlir +++ b/mlir/test/Dialect/Affine/affine-data-copy.mlir @@ -333,3 +333,23 @@ func.func @index_elt_type(%arg0: memref<1x2x4x8xindex>) { // CHECK-NEXT: affine.for %{{.*}} = 0 to 8 return } + +#map = affine_map<(d0) -> (d0 + 1)> + +// CHECK-LABEL: func @arbitrary_memory_space +func.func @arbitrary_memory_space() { + %alloc = memref.alloc() : memref<256x8xi8, #spirv.storage_class> + affine.for %arg0 = 0 to 32 step 4 { + %0 = affine.apply #map(%arg0) + affine.for %arg1 = 0 to 8 step 2 { + %1 = affine.apply #map(%arg1) + affine.for %arg2 = 0 to 8 step 2 { + // CHECK: memref.alloc() : memref<1x7xi8> + %2 = affine.apply #map(%arg2) + %3 = affine.load %alloc[%0, %1] : memref<256x8xi8, #spirv.storage_class> + affine.store %3, %alloc[%0, %2] : memref<256x8xi8, #spirv.storage_class> + } + } + } + return +} diff --git a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir index c04d7d2053866..858b7d3ddf9f1 100644 --- a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir +++ b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir @@ -855,15 +855,16 @@ func.func @affine_prefetch_invariant() { affine.for %i0 = 0 to 10 { affine.for %i1 = 0 to 10 { %1 = affine.load %0[%i0, %i1] : memref<10x10xf32> + // A prefetch shouldn't be hoisted. affine.prefetch %0[%i0, %i0], write, locality<0>, data : memref<10x10xf32> } } // CHECK: memref.alloc() : memref<10x10xf32> // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { - // CHECK-NEXT: affine.prefetch // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { - // CHECK-NEXT: %{{.*}} = affine.load %{{.*}}[%{{.*}} : memref<10x10xf32> + // CHECK-NEXT: affine.load %{{.*}}[%{{.*}} : memref<10x10xf32> + // CHECK-NEXT: affine.prefetch // CHECK-NEXT: } // CHECK-NEXT: } return diff --git a/mlir/test/Dialect/Affine/canonicalize.mlir b/mlir/test/Dialect/Affine/canonicalize.mlir index 5384977151b47..d3f61f7e503f9 100644 --- a/mlir/test/Dialect/Affine/canonicalize.mlir +++ b/mlir/test/Dialect/Affine/canonicalize.mlir @@ -1795,6 +1795,72 @@ func.func @no_cancel_delinearize_linearize_different_basis(%arg0: index, %arg1: // ----- +// CHECK-LABEL: func @split_delinearize_spanning_final_part +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: index, +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index, +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index) +// CHECK: %[[LIN:.+]] = affine.linearize_index disjoint [%[[ARG0]], %[[ARG1]]] by (2, 4) +// CHECK: %[[DELIN1:.+]]:2 = affine.delinearize_index %[[LIN]] into (2) +// CHECK: %[[DELIN2:.+]]:2 = affine.delinearize_index %[[ARG2]] into (8, 8) +// CHECK: return %[[DELIN1]]#0, %[[DELIN1]]#1, %[[DELIN2]]#0, %[[DELIN2]]#1 +func.func @split_delinearize_spanning_final_part(%arg0: index, %arg1: index, %arg2: index) -> (index, index, index, index) { + %0 = affine.linearize_index disjoint [%arg0, %arg1, %arg2] by (2, 4, 64) : index + %1:4 = affine.delinearize_index %0 into (2, 8, 8) + : index, index, index, index + return %1#0, %1#1, %1#2, %1#3 : index, index, index, index +} + +// ----- + +// CHECK-LABEL: func @split_delinearize_spanning_final_part_and_cancel +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: index, +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index, +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index) +// CHECK: %[[DELIN:.+]]:2 = affine.delinearize_index %[[ARG2]] into (8, 8) +// CHECK: return %[[ARG0]], %[[ARG1]], %[[DELIN]]#0, %[[DELIN]]#1 +func.func @split_delinearize_spanning_final_part_and_cancel(%arg0: index, %arg1: index, %arg2: index) -> (index, index, index, index) { + %0 = affine.linearize_index disjoint [%arg0, %arg1, %arg2] by (2, 4, 64) : index + %1:4 = affine.delinearize_index %0 into (2, 4, 8, 8) + : index, index, index, index + return %1#0, %1#1, %1#2, %1#3 : index, index, index, index +} + +// ----- + +// The delinearize basis doesn't match the last basis element before +// overshooting it, don't simplify. +// CHECK-LABEL: func @dont_split_delinearize_overshooting_target +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: index, +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index, +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index) +// CHECK: %[[LIN:.+]] = affine.linearize_index disjoint [%[[ARG0]], %[[ARG1]], %[[ARG2]]] by (2, 4, 64) +// CHECK: %[[DELIN:.+]]:4 = affine.delinearize_index %[[LIN]] into (2, 16, 8) +// CHECK: return %[[DELIN]]#0, %[[DELIN]]#1, %[[DELIN]]#2, %[[DELIN]]#3 +func.func @dont_split_delinearize_overshooting_target(%arg0: index, %arg1: index, %arg2: index) -> (index, index, index, index) { + %0 = affine.linearize_index disjoint [%arg0, %arg1, %arg2] by (2, 4, 64) : index + %1:4 = affine.delinearize_index %0 into (2, 16, 8) + : index, index, index, index + return %1#0, %1#1, %1#2, %1#3 : index, index, index, index +} + +// ----- + +// The delinearize basis doesn't fully multiply to the final basis element. +// CHECK-LABEL: func @dont_split_delinearize_undershooting_target +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: index, +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index) +// CHECK: %[[LIN:.+]] = affine.linearize_index disjoint [%[[ARG0]], %[[ARG1]]] by (2, 64) +// CHECK: %[[DELIN:.+]]:3 = affine.delinearize_index %[[LIN]] into (4, 8) +// CHECK: return %[[DELIN]]#0, %[[DELIN]]#1 +func.func @dont_split_delinearize_undershooting_target(%arg0: index, %arg1: index) -> (index, index, index) { + %0 = affine.linearize_index disjoint [%arg0, %arg1] by (2, 64) : index + %1:3 = affine.delinearize_index %0 into (4, 8) + : index, index, index + return %1#0, %1#1, %1#2 : index, index, index +} + +// ----- + // CHECK-LABEL: @linearize_unit_basis_disjoint // CHECK-SAME: (%[[arg0:.+]]: index, %[[arg1:.+]]: index, %[[arg2:.+]]: index, %[[arg3:.+]]: index) // CHECK: %[[ret:.+]] = affine.linearize_index disjoint [%[[arg0]], %[[arg2]]] by (3, %[[arg3]]) : index diff --git a/mlir/test/Dialect/Affine/loop-fusion-4.mlir b/mlir/test/Dialect/Affine/loop-fusion-4.mlir index 3fc31ad0d77b8..f46ad0f5e4c23 100644 --- a/mlir/test/Dialect/Affine/loop-fusion-4.mlir +++ b/mlir/test/Dialect/Affine/loop-fusion-4.mlir @@ -1,5 +1,6 @@ // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(affine-loop-fusion{mode=producer}))' -split-input-file | FileCheck %s --check-prefix=PRODUCER-CONSUMER // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(affine-loop-fusion{fusion-maximal mode=sibling}))' -split-input-file | FileCheck %s --check-prefix=SIBLING-MAXIMAL +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(spirv.func(affine-loop-fusion{mode=producer}))' -split-input-file | FileCheck %s --check-prefix=SPIRV // Part I of fusion tests in mlir/test/Transforms/loop-fusion.mlir. // Part II of fusion tests in mlir/test/Transforms/loop-fusion-2.mlir @@ -226,3 +227,61 @@ func.func @fuse_higher_dim_nest_into_lower_dim_nest() { // PRODUCER-CONSUMER: return return } + +// ----- + +// Basic test to ensure fusion works inside other func ops like spirv.func. + +#map = affine_map<(d0, d1) -> (d0 + d1)> +module { + // SPIRV-LABEL: func @test_avgpool2d_pad_right + spirv.func @test_avgpool2d_pad_right(%arg0: !spirv.array<8192 x f32>) -> !spirv.array<8192 x f32> "None" { + %cst_f32 = spirv.Constant 0.000000e+00 : f32 + %0 = builtin.unrealized_conversion_cast %arg0 : !spirv.array<8192 x f32> to tensor<1x32x32x8xf32> + %padded = tensor.pad %0 low[0, 4, 4, 0] high[0, 4, 8193, 0] { + ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): + tensor.yield %cst_f32 : f32 + } : tensor<1x32x32x8xf32> to tensor<1x40x8229x8xf32> + %1 = bufferization.to_memref %padded : memref<1x40x8229x8xf32> + %alloc_0 = memref.alloc() {alignment = 64 : i64} : memref<1x32x32x8xf32> + affine.for %arg1 = 0 to 1 { + affine.for %arg2 = 0 to 32 { + affine.for %arg3 = 0 to 32 { + affine.for %arg4 = 0 to 8 { + affine.for %arg5 = 0 to 1 { + affine.for %arg6 = 0 to 1 { + %4 = affine.apply #map(%arg2, %arg5) + %5 = affine.apply #map(%arg3, %arg6) + %6 = affine.load %1[%arg1, %4, %5, %arg4] : memref<1x40x8229x8xf32> + %7 = affine.load %alloc_0[%arg1, %arg2, %arg3, %arg4] : memref<1x32x32x8xf32> + %8 = arith.addf %7, %6 : f32 + affine.store %8, %alloc_0[%arg1, %arg2, %arg3, %arg4] : memref<1x32x32x8xf32> + } + } + } + } + } + } + %alloc_1 = memref.alloc() {alignment = 64 : i64} : memref<1x32x32x8xf32> + affine.for %arg1 = 0 to 1 { + affine.for %arg2 = 0 to 32 { + affine.for %arg3 = 0 to 32 { + affine.for %arg4 = 0 to 8 { + %4 = affine.load %alloc_0[%arg1, %arg2, %arg3, %arg4] : memref<1x32x32x8xf32> + } + } + } + } + // Test fusion. + // SPIRV: affine.for %{{.*}} = 0 to 1 { + // SPIRV-NEXT: affine.for %{{.*}} = 0 to 32 { + // SPIRV-NEXT: affine.for %{{.*}} = 0 to 32 { + // SPIRV-NEXT: affine.for %{{.*}} = 0 to 8 { + // SPIRV-NOT: affine.for %{{.*}} + + // SPIRV: ReturnValue + %2 = bufferization.to_tensor %alloc_1 : memref<1x32x32x8xf32> + %3 = builtin.unrealized_conversion_cast %2 : tensor<1x32x32x8xf32> to !spirv.array<8192 x f32> + spirv.ReturnValue %3 : !spirv.array<8192 x f32> + } +} diff --git a/mlir/test/Dialect/GPU/barrier-elimination.mlir b/mlir/test/Dialect/GPU/barrier-elimination.mlir index 1f5b84937deb0..7f6619adcd78f 100644 --- a/mlir/test/Dialect/GPU/barrier-elimination.mlir +++ b/mlir/test/Dialect/GPU/barrier-elimination.mlir @@ -182,3 +182,20 @@ attributes {__parallel_region_boundary_for_test} { %4 = memref.load %C[] : memref return %0, %1, %2, %3, %4 : f32, f32, f32, f32, f32 } + +// CHECK-LABEL: @nested_loop_barrier_only +func.func @nested_loop_barrier_only() attributes {__parallel_region_boundary_for_test} { + %c0 = arith.constant 0 : index + %c42 = arith.constant 42 : index + %c1 = arith.constant 1 : index + // Note: the barrier can be removed and as consequence the loops get folded + // by the greedy rewriter. + // CHECK-NOT: scf.for + // CHECK-NOT: gpu.barrier + scf.for %j = %c0 to %c42 step %c1 { + scf.for %i = %c0 to %c42 step %c1 { + gpu.barrier + } + } + return +} diff --git a/mlir/test/Dialect/SPIRV/IR/memory-ops.mlir b/mlir/test/Dialect/SPIRV/IR/memory-ops.mlir index 12bfee9fb6511..5aef6135afd97 100644 --- a/mlir/test/Dialect/SPIRV/IR/memory-ops.mlir +++ b/mlir/test/Dialect/SPIRV/IR/memory-ops.mlir @@ -699,7 +699,7 @@ func.func @copy_memory_print_maa() { // CHECK-SAME: %[[ARG1:.*]]: i64) // CHECK: spirv.PtrAccessChain %[[ARG0]][%[[ARG1]]] : !spirv.ptr, i64 func.func @ptr_access_chain1(%arg0: !spirv.ptr, %arg1 : i64) -> () { - %0 = spirv.PtrAccessChain %arg0[%arg1] : !spirv.ptr, i64 + %0 = spirv.PtrAccessChain %arg0[%arg1] : !spirv.ptr, i64 -> !spirv.ptr return } @@ -714,6 +714,6 @@ func.func @ptr_access_chain1(%arg0: !spirv.ptr, %arg1 : i64 // CHECK-SAME: %[[ARG1:.*]]: i64) // CHECK: spirv.InBoundsPtrAccessChain %[[ARG0]][%[[ARG1]]] : !spirv.ptr, i64 func.func @inbounds_ptr_access_chain1(%arg0: !spirv.ptr, %arg1 : i64) -> () { - %0 = spirv.InBoundsPtrAccessChain %arg0[%arg1] : !spirv.ptr, i64 + %0 = spirv.InBoundsPtrAccessChain %arg0[%arg1] : !spirv.ptr, i64 -> !spirv.ptr return } diff --git a/mlir/test/IR/locations.mlir b/mlir/test/IR/locations.mlir index 0c6426ebec874..b725307b420b7 100644 --- a/mlir/test/IR/locations.mlir +++ b/mlir/test/IR/locations.mlir @@ -33,6 +33,15 @@ func.func @inline_notation() -> i32 { // CHECK-LABEL: func private @loc_attr(i1 {foo.loc_attr = loc(callsite("foo" at "mysource.cc":10:8))}) func.func private @loc_attr(i1 {foo.loc_attr = loc(callsite("foo" at "mysource.cc":10:8))}) +// CHECK-LABEL: func.func private @filelocrange_attr1(i1 {foo.loc_attr = loc("mysource.cc":10:0)}) +func.func private @filelocrange_attr1(i1 {foo.loc_attr = loc("mysource.cc":10)}) +// CHECK-LABEL: func.func private @filelocrange_attr2(i1 {foo.loc_attr = loc("mysource.cc":10:8)}) +func.func private @filelocrange_attr2(i1 {foo.loc_attr = loc("mysource.cc":10:8)}) +// CHECK-LABEL: func.func private @filelocrange_attr3(i1 {foo.loc_attr = loc("mysource.cc":10:8 to :12)}) +func.func private @filelocrange_attr3(i1 {foo.loc_attr = loc("mysource.cc":10:8 to :12)}) +// CHECK-LABEL: func.func private @filelocrange_attr4(i1 {foo.loc_attr = loc("mysource.cc":10:8 to 12:4)}) +func.func private @filelocrange_attr4(i1 {foo.loc_attr = loc("mysource.cc":10:8 to 12:4)}) + // Check that locations get properly escaped. // CHECK-LABEL: func @escape_strings() func.func @escape_strings() { diff --git a/mlir/test/IR/properties.mlir b/mlir/test/IR/properties.mlir index 9a1c49cb7dabf..b339a03812bad 100644 --- a/mlir/test/IR/properties.mlir +++ b/mlir/test/IR/properties.mlir @@ -1,4 +1,4 @@ -// # RUN: mlir-opt %s -split-input-file | mlir-opt |FileCheck %s +// # RUN: mlir-opt %s -split-input-file | mlir-opt | FileCheck %s // # RUN: mlir-opt %s -mlir-print-op-generic -split-input-file | mlir-opt -mlir-print-op-generic | FileCheck %s --check-prefix=GENERIC // CHECK: test.with_properties @@ -38,6 +38,14 @@ test.using_property_in_custom [1, 4, 20] // GENERIC-SAME: }> test.using_property_ref_in_custom 1 + 4 = 5 +// Tests that the variadic segment size properties are elided. +// CHECK: %[[CI64:.*]] = arith.constant +// CHECK-NEXT: test.variadic_segment_prop %[[CI64]], %[[CI64]] : %[[CI64]] : i64, i64 : i64 end +// GENERIC: %[[CI64:.*]] = "arith.constant"() +// GENERIC-NEXT: "test.variadic_segment_prop"(%[[CI64]], %[[CI64]], %[[CI64]]) <{operandSegmentSizes = array, resultSegmentSizes = array}> : (i64, i64, i64) -> (i64, i64, i64) +%ci64 = arith.constant 0 : i64 +test.variadic_segment_prop %ci64, %ci64 : %ci64 : i64, i64 : i64 end + // CHECK: test.with_default_valued_properties na{{$}} // GENERIC: "test.with_default_valued_properties"() // GENERIC-SAME: <{a = 0 : i32, b = "", c = -1 : i32, unit = false}> : () -> () diff --git a/mlir/test/Transforms/remove-dead-values.mlir b/mlir/test/Transforms/remove-dead-values.mlir index 47137fc6430fe..826f6159a36b6 100644 --- a/mlir/test/Transforms/remove-dead-values.mlir +++ b/mlir/test/Transforms/remove-dead-values.mlir @@ -3,9 +3,12 @@ // The IR is updated regardless of memref.global private constant // module { - memref.global "private" constant @__something_global : memref = dense<0> + // CHECK: memref.global "private" constant @__constant_4xi32 : memref<4xi32> = dense<[1, 2, 3, 4]> {alignment = 16 : i64} + memref.global "private" constant @__constant_4xi32 : memref<4xi32> = dense<[1, 2, 3, 4]> {alignment = 16 : i64} func.func @main(%arg0: i32) -> i32 { %0 = tensor.empty() : tensor<10xbf16> + // CHECK-NOT: memref.get_global + %1 = memref.get_global @__constant_4xi32 : memref<4xi32> // CHECK-NOT: tensor.empty return %arg0 : i32 } @@ -29,7 +32,7 @@ module @named_module_acceptable { // func.func @dont_touch_unacceptable_ir_has_cleanable_simple_op_with_branch_op(%arg0: i1) { %non_live = arith.constant 0 : i32 - // expected-error @+1 {{cannot optimize an IR with non-call symbol user ops or branch ops}} + // expected-error @+1 {{cannot optimize an IR with branch ops}} cf.cond_br %arg0, ^bb1(%non_live : i32), ^bb2(%non_live : i32) ^bb1(%non_live_0 : i32): cf.br ^bb3 diff --git a/mlir/test/Transforms/test-legalizer.mlir b/mlir/test/Transforms/test-legalizer.mlir index e5503ee892042..e05f444afa68f 100644 --- a/mlir/test/Transforms/test-legalizer.mlir +++ b/mlir/test/Transforms/test-legalizer.mlir @@ -379,15 +379,24 @@ builtin.module { // ----- -// expected-remark @below {{applyPartialConversion failed}} module { - func.func private @callee(%0 : f32) -> f32 - - func.func @caller( %arg: f32) { - // expected-error @below {{failed to legalize}} - %1 = func.call @callee(%arg) : (f32) -> f32 - return - } +// CHECK-LABEL: func.func private @callee() -> (f16, f16) +func.func private @callee() -> (f32, i24) + +// CHECK: func.func @caller() +func.func @caller() { + // f32 is converted to (f16, f16). + // i24 is converted to (). + // CHECK: %[[call:.*]]:2 = call @callee() : () -> (f16, f16) + %0:2 = func.call @callee() : () -> (f32, i24) + + // CHECK: %[[cast1:.*]] = "test.cast"() : () -> i24 + // CHECK: %[[cast0:.*]] = "test.cast"(%[[call]]#0, %[[call]]#1) : (f16, f16) -> f32 + // CHECK: "test.some_user"(%[[cast0]], %[[cast1]]) : (f32, i24) -> () + // expected-remark @below{{'test.some_user' is not legalizable}} + "test.some_user"(%0#0, %0#1) : (f32, i24) -> () + "test.return"() : () -> () +} } // ----- diff --git a/mlir/test/lib/Dialect/LLVM/CMakeLists.txt b/mlir/test/lib/Dialect/LLVM/CMakeLists.txt index 734757ce79da3..6a2f0ba2756d4 100644 --- a/mlir/test/lib/Dialect/LLVM/CMakeLists.txt +++ b/mlir/test/lib/Dialect/LLVM/CMakeLists.txt @@ -1,6 +1,7 @@ # Exclude tests from libMLIR.so add_mlir_library(MLIRLLVMTestPasses TestLowerToLLVM.cpp + TestPatterns.cpp EXCLUDE_FROM_LIBMLIR diff --git a/mlir/test/lib/Dialect/LLVM/TestPatterns.cpp b/mlir/test/lib/Dialect/LLVM/TestPatterns.cpp new file mode 100644 index 0000000000000..ab02866970b1d --- /dev/null +++ b/mlir/test/lib/Dialect/LLVM/TestPatterns.cpp @@ -0,0 +1,77 @@ +//===- TestPatterns.cpp - LLVM dialect test patterns ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Conversion/LLVMCommon/TypeConverter.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/LLVMIR/LLVMTypes.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/DialectConversion.h" + +using namespace mlir; + +namespace { + +/// Replace this op (which is expected to have 1 result) with the operands. +struct TestDirectReplacementOp : public ConversionPattern { + TestDirectReplacementOp(MLIRContext *ctx, const TypeConverter &converter) + : ConversionPattern(converter, "test.direct_replacement", 1, ctx) {} + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + if (op->getNumResults() != 1) + return failure(); + rewriter.replaceOpWithMultiple(op, {operands}); + return success(); + } +}; + +struct TestLLVMLegalizePatternsPass + : public PassWrapper> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestLLVMLegalizePatternsPass) + + StringRef getArgument() const final { return "test-llvm-legalize-patterns"; } + StringRef getDescription() const final { + return "Run LLVM dialect legalization patterns"; + } + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } + + void runOnOperation() override { + MLIRContext *ctx = &getContext(); + LLVMTypeConverter converter(ctx); + mlir::RewritePatternSet patterns(ctx); + patterns.add(ctx, converter); + + // Define the conversion target used for the test. + ConversionTarget target(*ctx); + target.addLegalOp(OperationName("test.legal_op", ctx)); + + // Handle a partial conversion. + DenseSet unlegalizedOps; + ConversionConfig config; + config.unlegalizedOps = &unlegalizedOps; + if (failed(applyPartialConversion(getOperation(), target, + std::move(patterns), config))) + getOperation()->emitError() << "applyPartialConversion failed"; + } +}; +} // namespace + +//===----------------------------------------------------------------------===// +// PassRegistration +//===----------------------------------------------------------------------===// + +namespace mlir { +namespace test { +void registerTestLLVMLegalizePatternsPass() { + PassRegistration(); +} +} // namespace test +} // namespace mlir diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index cfe19a2fd5c08..6752113cab8d4 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -3047,6 +3047,15 @@ def TestOpUsingPropertyInCustomAndOther ); } +def TestOpWithVariadicSegmentProperties : TEST_Op<"variadic_segment_prop", + [AttrSizedOperandSegments, AttrSizedResultSegments]> { + let arguments = (ins Variadic:$a1, Variadic:$a2); + let results = (outs Variadic:$b1, Variadic:$b2); + let assemblyFormat = [{ + $a1 `:` $a2 `:` type($b1) `:` type($b2) prop-dict attr-dict `end` + }]; +} + def TestOpUsingPropertyRefInCustom : TEST_Op<"using_property_ref_in_custom"> { let assemblyFormat = "custom($first) `+` custom($second, ref($first)) attr-dict"; let arguments = (ins IntProperty<"int64_t">:$first, IntProperty<"int64_t">:$second); diff --git a/mlir/test/lib/Dialect/Test/TestPatterns.cpp b/mlir/test/lib/Dialect/Test/TestPatterns.cpp index 3df6cff3c0a60..bbd55938718fe 100644 --- a/mlir/test/lib/Dialect/Test/TestPatterns.cpp +++ b/mlir/test/lib/Dialect/Test/TestPatterns.cpp @@ -1215,6 +1215,11 @@ struct TestTypeConverter : public TypeConverter { return success(); } + // Drop I24 types. + if (t.isInteger(24)) { + return success(); + } + // Otherwise, convert the type directly. results.push_back(t); return success(); diff --git a/mlir/test/lib/Dialect/Test/TestTypeDefs.td b/mlir/test/lib/Dialect/Test/TestTypeDefs.td index 830475bed4e44..60108ac86d1ed 100644 --- a/mlir/test/lib/Dialect/Test/TestTypeDefs.td +++ b/mlir/test/lib/Dialect/Test/TestTypeDefs.td @@ -375,7 +375,7 @@ def TestI32 : Test_Type<"TestI32"> { } def TestRecursiveAlias - : Test_Type<"TestRecursiveAlias", [NativeTypeTrait<"IsMutable">]> { + : Test_Type<"TestRecursiveAlias", [MutableType]> { let mnemonic = "test_rec_alias"; let storageClass = "TestRecursiveTypeStorage"; let storageNamespace = "test"; diff --git a/mlir/test/python/pass_manager.py b/mlir/test/python/pass_manager.py index 7496703256235..a794a3fc6fa00 100644 --- a/mlir/test/python/pass_manager.py +++ b/mlir/test/python/pass_manager.py @@ -1,6 +1,6 @@ # RUN: %PYTHON %s 2>&1 | FileCheck %s -import gc, sys +import gc, os, sys, tempfile from mlir.ir import * from mlir.passmanager import * from mlir.dialects.func import FuncOp @@ -340,3 +340,45 @@ def testPrintIrBeforeAndAfterAll(): # CHECK: } # CHECK: } pm.run(module) + + +# CHECK-LABEL: TEST: testPrintIrTree +@run +def testPrintIrTree(): + with Context() as ctx: + module = ModuleOp.parse( + """ + module { + func.func @main() { + %0 = arith.constant 10 + return + } + } + """ + ) + pm = PassManager.parse("builtin.module(canonicalize)") + ctx.enable_multithreading(False) + pm.enable_ir_printing() + # CHECK-LABEL: // Tree printing begin + # CHECK: \-- builtin_module_no-symbol-name + # CHECK: \-- 0_canonicalize.mlir + # CHECK-LABEL: // Tree printing end + pm.run(module) + log("// Tree printing begin") + with tempfile.TemporaryDirectory() as temp_dir: + pm.enable_ir_printing(tree_printing_dir_path=temp_dir) + pm.run(module) + + def print_file_tree(directory, prefix=""): + entries = sorted(os.listdir(directory)) + for i, entry in enumerate(entries): + path = os.path.join(directory, entry) + connector = "\-- " if i == len(entries) - 1 else "|-- " + log(f"{prefix}{connector}{entry}") + if os.path.isdir(path): + print_file_tree( + path, prefix + (" " if i == len(entries) - 1 else "│ ") + ) + + print_file_tree(temp_dir) + log("// Tree printing end") diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index 002c3900056de..94bc67a1e9609 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -113,6 +113,7 @@ void registerTestLinalgRankReduceContractionOps(); void registerTestLinalgTransforms(); void registerTestLivenessAnalysisPass(); void registerTestLivenessPass(); +void registerTestLLVMLegalizePatternsPass(); void registerTestLoopFusion(); void registerTestLoopMappingPass(); void registerTestLoopUnrollingPass(); @@ -250,6 +251,7 @@ void registerTestPasses() { mlir::test::registerTestLinalgTransforms(); mlir::test::registerTestLivenessAnalysisPass(); mlir::test::registerTestLivenessPass(); + mlir::test::registerTestLLVMLegalizePatternsPass(); mlir::test::registerTestLoopFusion(); mlir::test::registerTestLoopMappingPass(); mlir::test::registerTestLoopUnrollingPass(); diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp index 7e2b0694a860a..097a578cb2025 100644 --- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp @@ -2008,17 +2008,34 @@ static void genNonDefaultValueCheck(MethodBody &body, const Operator &op, << "() != " << propElement.getVar()->prop.getDefaultValue(); } +/// Elide the variadic segment size attributes if necessary. +/// This pushes elided attribute names in `elidedStorage`. +static void genVariadicSegmentElision(OperationFormat &fmt, Operator &op, + MethodBody &body, + const char *elidedStorage) { + if (!fmt.allOperands && + op.getTrait("::mlir::OpTrait::AttrSizedOperandSegments")) + body << " " << elidedStorage << ".push_back(\"operandSegmentSizes\");\n"; + if (!fmt.allResultTypes && + op.getTrait("::mlir::OpTrait::AttrSizedResultSegments")) + body << " " << elidedStorage << ".push_back(\"resultSegmentSizes\");\n"; +} + /// Generate the printer for the 'prop-dict' directive. static void genPropDictPrinter(OperationFormat &fmt, Operator &op, MethodBody &body) { body << " ::llvm::SmallVector<::llvm::StringRef, 2> elidedProps;\n"; + + genVariadicSegmentElision(fmt, op, body, "elidedProps"); + for (const NamedProperty *namedProperty : fmt.usedProperties) body << " elidedProps.push_back(\"" << namedProperty->name << "\");\n"; for (const NamedAttribute *namedAttr : fmt.usedAttributes) body << " elidedProps.push_back(\"" << namedAttr->name << "\");\n"; - // Add code to check attributes for equality with the default value - // for attributes with the elidePrintingDefaultValue bit set. + // Add code to check attributes for equality with their default values. + // Default-valued attributes will not be printed when their value matches the + // default. for (const NamedAttribute &namedAttr : op.getAttributes()) { const Attribute &attr = namedAttr.attr; if (!attr.isDerivedAttr() && attr.hasDefaultValue()) { @@ -2057,19 +2074,17 @@ static void genPropDictPrinter(OperationFormat &fmt, Operator &op, static void genAttrDictPrinter(OperationFormat &fmt, Operator &op, MethodBody &body, bool withKeyword) { body << " ::llvm::SmallVector<::llvm::StringRef, 2> elidedAttrs;\n"; - // Elide the variadic segment size attributes if necessary. - if (!fmt.allOperands && - op.getTrait("::mlir::OpTrait::AttrSizedOperandSegments")) - body << " elidedAttrs.push_back(\"operandSegmentSizes\");\n"; - if (!fmt.allResultTypes && - op.getTrait("::mlir::OpTrait::AttrSizedResultSegments")) - body << " elidedAttrs.push_back(\"resultSegmentSizes\");\n"; + + genVariadicSegmentElision(fmt, op, body, "elidedAttrs"); + for (const StringRef key : fmt.inferredAttributes.keys()) body << " elidedAttrs.push_back(\"" << key << "\");\n"; for (const NamedAttribute *attr : fmt.usedAttributes) body << " elidedAttrs.push_back(\"" << attr->name << "\");\n"; - // Add code to check attributes for equality with the default value - // for attributes with the elidePrintingDefaultValue bit set. + + // Add code to check attributes for equality with their default values. + // Default-valued attributes will not be printed when their value matches the + // default. for (const NamedAttribute &namedAttr : op.getAttributes()) { const Attribute &attr = namedAttr.attr; if (!attr.isDerivedAttr() && attr.hasDefaultValue()) { diff --git a/offload/DeviceRTL/src/Mapping.cpp b/offload/DeviceRTL/src/Mapping.cpp index 3aefcff68e195..881bd12f03405 100644 --- a/offload/DeviceRTL/src/Mapping.cpp +++ b/offload/DeviceRTL/src/Mapping.cpp @@ -25,7 +25,6 @@ namespace ompx { namespace impl { // Forward declarations defined to be defined for AMDGCN and NVPTX. -const llvm::omp::GV &getGridValue(); LaneMaskTy activemask(); LaneMaskTy lanemaskLT(); LaneMaskTy lanemaskGT(); @@ -37,15 +36,14 @@ uint32_t getBlockIdInKernel(int32_t Dim); uint32_t getNumberOfBlocksInKernel(int32_t Dim); uint32_t getWarpIdInBlock(); uint32_t getNumberOfWarpsInBlock(); +uint32_t getWarpSize(); /// AMDGCN Implementation /// ///{ #pragma omp begin declare variant match(device = {arch(amdgcn)}) -const llvm::omp::GV &getGridValue() { - return llvm::omp::getAMDGPUGridValues<__AMDGCN_WAVEFRONT_SIZE>(); -} +uint32_t getWarpSize() { return __builtin_amdgcn_wavefrontsize(); } uint32_t getNumberOfThreadsInBlock(int32_t Dim) { switch (Dim) { @@ -152,7 +150,7 @@ uint32_t getNumberOfThreadsInBlock(int32_t Dim) { UNREACHABLE("Dim outside range!"); } -const llvm::omp::GV &getGridValue() { return llvm::omp::NVPTXGridValues; } +uint32_t getWarpSize() { return __nvvm_read_ptx_sreg_warpsize(); } LaneMaskTy activemask() { return __nvvm_activemask(); } @@ -219,8 +217,6 @@ uint32_t getNumberOfWarpsInBlock() { #pragma omp end declare variant ///} -uint32_t getWarpSize() { return getGridValue().GV_Warp_Size; } - } // namespace impl } // namespace ompx diff --git a/offload/plugins-nextgen/common/CMakeLists.txt b/offload/plugins-nextgen/common/CMakeLists.txt index fde4b2f930349..3ed5c02ed4a3b 100644 --- a/offload/plugins-nextgen/common/CMakeLists.txt +++ b/offload/plugins-nextgen/common/CMakeLists.txt @@ -34,6 +34,7 @@ elseif(${LIBOMPTARGET_GPU_LIBC_SUPPORT}) # We may need to get the headers directly from the 'libc' source directory. target_include_directories(PluginCommon PRIVATE ${CMAKE_SOURCE_DIR}/../libc/utils/gpu/server + ${CMAKE_SOURCE_DIR}/../libc/ ${CMAKE_SOURCE_DIR}/../libc/include) endif() endif() diff --git a/offload/plugins-nextgen/common/include/RPC.h b/offload/plugins-nextgen/common/include/RPC.h index 01bf539bcb3f3..5b9b7ffd086b5 100644 --- a/offload/plugins-nextgen/common/include/RPC.h +++ b/offload/plugins-nextgen/common/include/RPC.h @@ -61,7 +61,7 @@ struct RPCServerTy { private: /// Array from this device's identifier to its attached devices. - llvm::SmallVector Handles; + llvm::SmallVector Buffers; }; } // namespace llvm::omp::target diff --git a/offload/plugins-nextgen/common/src/RPC.cpp b/offload/plugins-nextgen/common/src/RPC.cpp index faa2cbd4f02fe..be41928111da4 100644 --- a/offload/plugins-nextgen/common/src/RPC.cpp +++ b/offload/plugins-nextgen/common/src/RPC.cpp @@ -12,9 +12,11 @@ #include "PluginInterface.h" +// TODO: This should be included unconditionally and cleaned up. #if defined(LIBOMPTARGET_RPC_SUPPORT) -#include "llvm-libc-types/rpc_opcodes_t.h" #include "llvmlibc_rpc_server.h" +#include "shared/rpc.h" +#include "shared/rpc_opcodes.h" #endif using namespace llvm; @@ -22,14 +24,14 @@ using namespace omp; using namespace target; RPCServerTy::RPCServerTy(plugin::GenericPluginTy &Plugin) - : Handles(Plugin.getNumDevices()) {} + : Buffers(Plugin.getNumDevices()) {} llvm::Expected RPCServerTy::isDeviceUsingRPC(plugin::GenericDeviceTy &Device, plugin::GenericGlobalHandlerTy &Handler, plugin::DeviceImageTy &Image) { #ifdef LIBOMPTARGET_RPC_SUPPORT - return Handler.isSymbolInImage(Device, Image, rpc_client_symbol_name); + return Handler.isSymbolInImage(Device, Image, "__llvm_libc_rpc_client"); #else return false; #endif @@ -39,59 +41,18 @@ Error RPCServerTy::initDevice(plugin::GenericDeviceTy &Device, plugin::GenericGlobalHandlerTy &Handler, plugin::DeviceImageTy &Image) { #ifdef LIBOMPTARGET_RPC_SUPPORT - auto Alloc = [](uint64_t Size, void *Data) { - plugin::GenericDeviceTy &Device = - *reinterpret_cast(Data); - return Device.allocate(Size, nullptr, TARGET_ALLOC_HOST); - }; uint64_t NumPorts = - std::min(Device.requestedRPCPortCount(), RPC_MAXIMUM_PORT_COUNT); - rpc_device_t RPCDevice; - if (rpc_status_t Err = rpc_server_init(&RPCDevice, NumPorts, - Device.getWarpSize(), Alloc, &Device)) + std::min(Device.requestedRPCPortCount(), rpc::MAX_PORT_COUNT); + void *RPCBuffer = Device.allocate( + rpc::Server::allocation_size(Device.getWarpSize(), NumPorts), nullptr, + TARGET_ALLOC_HOST); + if (!RPCBuffer) return plugin::Plugin::error( - "Failed to initialize RPC server for device %d: %d", - Device.getDeviceId(), Err); - - // Register a custom opcode handler to perform plugin specific allocation. - auto MallocHandler = [](rpc_port_t Port, void *Data) { - rpc_recv_and_send( - Port, - [](rpc_buffer_t *Buffer, void *Data) { - plugin::GenericDeviceTy &Device = - *reinterpret_cast(Data); - Buffer->data[0] = reinterpret_cast(Device.allocate( - Buffer->data[0], nullptr, TARGET_ALLOC_DEVICE_NON_BLOCKING)); - }, - Data); - }; - if (rpc_status_t Err = - rpc_register_callback(RPCDevice, RPC_MALLOC, MallocHandler, &Device)) - return plugin::Plugin::error( - "Failed to register RPC malloc handler for device %d: %d\n", - Device.getDeviceId(), Err); - - // Register a custom opcode handler to perform plugin specific deallocation. - auto FreeHandler = [](rpc_port_t Port, void *Data) { - rpc_recv( - Port, - [](rpc_buffer_t *Buffer, void *Data) { - plugin::GenericDeviceTy &Device = - *reinterpret_cast(Data); - Device.free(reinterpret_cast(Buffer->data[0]), - TARGET_ALLOC_DEVICE_NON_BLOCKING); - }, - Data); - }; - if (rpc_status_t Err = - rpc_register_callback(RPCDevice, RPC_FREE, FreeHandler, &Device)) - return plugin::Plugin::error( - "Failed to register RPC free handler for device %d: %d\n", - Device.getDeviceId(), Err); + "Failed to initialize RPC server for device %d", Device.getDeviceId()); // Get the address of the RPC client from the device. void *ClientPtr; - plugin::GlobalTy ClientGlobal(rpc_client_symbol_name, sizeof(void *)); + plugin::GlobalTy ClientGlobal("__llvm_libc_rpc_client", sizeof(void *)); if (auto Err = Handler.getGlobalMetadataFromDevice(Device, Image, ClientGlobal)) return Err; @@ -100,38 +61,63 @@ Error RPCServerTy::initDevice(plugin::GenericDeviceTy &Device, sizeof(void *), nullptr)) return Err; - const void *ClientBuffer = rpc_get_client_buffer(RPCDevice); - if (auto Err = Device.dataSubmit(ClientPtr, ClientBuffer, - rpc_get_client_size(), nullptr)) + rpc::Client client(NumPorts, RPCBuffer); + if (auto Err = + Device.dataSubmit(ClientPtr, &client, sizeof(rpc::Client), nullptr)) return Err; - Handles[Device.getDeviceId()] = RPCDevice.handle; + Buffers[Device.getDeviceId()] = RPCBuffer; + + return Error::success(); + #endif return Error::success(); } Error RPCServerTy::runServer(plugin::GenericDeviceTy &Device) { #ifdef LIBOMPTARGET_RPC_SUPPORT - rpc_device_t RPCDevice{Handles[Device.getDeviceId()]}; - if (rpc_status_t Err = rpc_handle_server(RPCDevice)) - return plugin::Plugin::error( - "Error while running RPC server on device %d: %d", Device.getDeviceId(), - Err); + uint64_t NumPorts = + std::min(Device.requestedRPCPortCount(), rpc::MAX_PORT_COUNT); + rpc::Server Server(NumPorts, Buffers[Device.getDeviceId()]); + + auto port = Server.try_open(Device.getWarpSize()); + if (!port) + return Error::success(); + + int Status = rpc::SUCCESS; + switch (port->get_opcode()) { + case RPC_MALLOC: { + port->recv_and_send([&](rpc::Buffer *Buffer, uint32_t) { + Buffer->data[0] = reinterpret_cast(Device.allocate( + Buffer->data[0], nullptr, TARGET_ALLOC_DEVICE_NON_BLOCKING)); + }); + break; + } + case RPC_FREE: { + port->recv([&](rpc::Buffer *Buffer, uint32_t) { + Device.free(reinterpret_cast(Buffer->data[0]), + TARGET_ALLOC_DEVICE_NON_BLOCKING); + }); + break; + } + default: + // Let the `libc` library handle any other unhandled opcodes. + Status = libc_handle_rpc_port(&*port, Device.getWarpSize()); + break; + } + port->close(); + + if (Status != rpc::SUCCESS) + return createStringError("RPC server given invalid opcode!"); + + return Error::success(); #endif return Error::success(); } Error RPCServerTy::deinitDevice(plugin::GenericDeviceTy &Device) { #ifdef LIBOMPTARGET_RPC_SUPPORT - rpc_device_t RPCDevice{Handles[Device.getDeviceId()]}; - auto Dealloc = [](void *Ptr, void *Data) { - plugin::GenericDeviceTy &Device = - *reinterpret_cast(Data); - Device.free(Ptr, TARGET_ALLOC_HOST); - }; - if (rpc_status_t Err = rpc_server_shutdown(RPCDevice, Dealloc, &Device)) - return plugin::Plugin::error( - "Failed to shut down RPC server for device %d: %d", - Device.getDeviceId(), Err); + Device.free(Buffers[Device.getDeviceId()], TARGET_ALLOC_HOST); + return Error::success(); #endif return Error::success(); } diff --git a/offload/test/offloading/ompx_bare_ballot_sync.c b/offload/test/offloading/ompx_bare_ballot_sync.c index 101d1255f0d67..b810fb404b58f 100644 --- a/offload/test/offloading/ompx_bare_ballot_sync.c +++ b/offload/test/offloading/ompx_bare_ballot_sync.c @@ -8,22 +8,33 @@ #include #include +#pragma omp begin declare variant match(device = {arch(amdgcn)}) +unsigned get_warp_size() { return __builtin_amdgcn_wavefrontsize(); } +#pragma omp end declare variant + +#pragma omp begin declare variant match(device = {arch(nvptx64)}) +unsigned get_warp_size() { return __nvvm_read_ptx_sreg_warpsize(); } +#pragma omp end declare variant + +#pragma omp begin declare variant match(device = {kind(cpu)}) +unsigned get_warp_size() { return 1; } +#pragma omp end declare variant + int main(int argc, char *argv[]) { const int num_blocks = 1; const int block_size = 256; const int N = num_blocks * block_size; int *res = (int *)malloc(N * sizeof(int)); -#pragma omp target teams ompx_bare num_teams(num_blocks) thread_limit(block_size) \ - map(from: res[0:N]) +#pragma omp target teams ompx_bare num_teams(num_blocks) \ + thread_limit(block_size) map(from : res[0 : N]) { int tid = ompx_thread_id_x(); uint64_t mask = ompx_ballot_sync(~0LU, tid & 0x1); -#if defined __AMDGCN_WAVEFRONT_SIZE && __AMDGCN_WAVEFRONT_SIZE == 64 - res[tid] = mask == 0xaaaaaaaaaaaaaaaa; -#else - res[tid] = mask == 0xaaaaaaaa; -#endif + if (get_warp_size() == 64) + res[tid] = mask == 0xaaaaaaaaaaaaaaaa; + else + res[tid] = mask == 0xaaaaaaaa; } for (int i = 0; i < N; ++i) diff --git a/offload/test/offloading/ompx_bare_shfl_down_sync.cpp b/offload/test/offloading/ompx_bare_shfl_down_sync.cpp index 9b0e66e25f68c..311999918de85 100644 --- a/offload/test/offloading/ompx_bare_shfl_down_sync.cpp +++ b/offload/test/offloading/ompx_bare_shfl_down_sync.cpp @@ -10,6 +10,18 @@ #include #include +#pragma omp begin declare variant match(device = {arch(amdgcn)}) +unsigned get_warp_size() { return __builtin_amdgcn_wavefrontsize(); } +#pragma omp end declare variant + +#pragma omp begin declare variant match(device = {arch(nvptx64)}) +unsigned get_warp_size() { return __nvvm_read_ptx_sreg_warpsize(); } +#pragma omp end declare variant + +#pragma omp begin declare variant match(device = {kind(cpu)}) +unsigned get_warp_size() { return 1; } +#pragma omp end declare variant + template ::value, bool> = true> bool equal(T LHS, T RHS) { return LHS == RHS; @@ -32,11 +44,7 @@ template void test() { { int tid = ompx_thread_id_x(); T val = ompx::shfl_down_sync(~0U, static_cast(tid), 1); -#ifdef __AMDGCN_WAVEFRONT_SIZE - int warp_size = __AMDGCN_WAVEFRONT_SIZE; -#else - int warp_size = 32; -#endif + int warp_size = get_warp_size(); if ((tid & (warp_size - 1)) != warp_size - 1) res[tid] = equal(val, static_cast(tid + 1)); else diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 832a7d0c19359..ef8ec171a1657 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -1,6 +1,18 @@ # This file handles building LLVM runtime sub-projects. cmake_minimum_required(VERSION 3.20.0) +# This file can be used in two ways: the bootstrapping build calls it from +# llvm/runtimes/CMakeLists.txt where we reuse the build tree of the top-level +# build or it can be directly invoked in this directory. In the latter case we +# might be building against a LLVM install tree and might not have a valid build +# tree set up yet. We can detect whether we are using the bootstrapping build +# by checking for the HAVE_LLVM_LIT flag that is passed explicitly to +# llvm_ExternalProject_Add(). +if (HAVE_LLVM_LIT) + message(STATUS "Performing bootstrapping runtimes build.") +else() + message(STATUS "Performing standalone runtimes build.") +endif() # Add path for custom and the LLVM build's modules to the CMake module path. set(LLVM_COMMON_CMAKE_UTILS "${CMAKE_CURRENT_SOURCE_DIR}/../cmake") include(${LLVM_COMMON_CMAKE_UTILS}/Modules/CMakePolicy.cmake @@ -236,6 +248,25 @@ foreach(entry ${runtimes}) endforeach() if(LLVM_INCLUDE_TESTS) + # If built with the runtimes build (rooted at runtimes/CMakeLists.txt), we + # won't have llvm-lit. If built with the bootstrapping build (rooted at + # llvm/CMakeLists.txt), the top-level llvm CMake invocation already generated + # the llvm-lit script. + if (NOT HAVE_LLVM_LIT) + # Ensure that the appropriate variables for lit are set before adding any + # runtimes since their CMake tests configuration might depend on lit being + # present. This ensures that the testsuites use a local lit from the build + # dir rather than ${LLVM_INSTALL_DIR}/bin/llvm-lit (which may not exist if + # LLVM_BINARY_DIR points at an installed LLVM tree rather than a build tree). + set(LLVM_LIT_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/bin) + get_llvm_lit_path(_base_dir _file_name) + set(LLVM_EXTERNAL_LIT "${_base_dir}/${_file_name}" CACHE STRING "Command used to spawn lit" FORCE) + # Avoid warning about missing llvm-lit from runtimes CMake files. This is + # fine since we call configure_file() to create llvm-lit at the end of this + # file (after recursing into all runtimes' CMake logic), so it will exist. + set(LLVM_EXTERNAL_LIT_MISSING_WARNED_ONCE YES CACHE INTERNAL "") + endif() + set(LIT_ARGS_DEFAULT "-sv --show-xfail --show-unsupported") if (MSVC OR XCODE) set(LIT_ARGS_DEFAULT "${LIT_ARGS_DEFAULT} --no-progress-bar") @@ -273,6 +304,8 @@ if(LLVM_INCLUDE_TESTS) # If built by manually invoking cmake on this directory, we don't have # llvm-lit. If invoked via llvm/runtimes, the toplevel llvm cmake # invocation already generated the llvm-lit script. + # NOTE: this must be called after all testsuites have been added, since + # otherwise the generated llvm-lit does not have all required path mappings. add_subdirectory(${LLVM_MAIN_SRC_DIR}/utils/llvm-lit ${CMAKE_CURRENT_BINARY_DIR}/llvm-lit) endif() @@ -306,10 +339,10 @@ if(SUB_COMPONENTS) if(LLVM_RUNTIMES_TARGET) configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/Components.cmake.in - ${LLVM_BINARY_DIR}/runtimes/${LLVM_RUNTIMES_TARGET}/Components.cmake) + ${CMAKE_CURRENT_BINARY_DIR}/runtimes/${LLVM_RUNTIMES_TARGET}/Components.cmake) else() configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/Components.cmake.in - ${LLVM_BINARY_DIR}/runtimes/Components.cmake) + ${CMAKE_CURRENT_BINARY_DIR}/runtimes/Components.cmake) endif() endif() diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index 0628947540ca7..b3c7108d840d3 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -1220,6 +1220,7 @@ cc_library( ":Core", ":MC", ":Object", + ":ProfileData", ":Support", ":TargetParser", ":config", diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 74f13788ab29f..51d72d2e5f5b2 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -4886,7 +4886,9 @@ cc_library( ":FuncExtensions", ":FuncToLLVM", ":FuncTransformOps", + ":GPUToGPURuntimeTransforms", ":GPUToLLVMIRTranslation", + ":GPUToNVVMTransforms", ":GPUTransformOps", ":IndexToLLVM", ":LLVMToLLVMIRTranslation", @@ -5907,6 +5909,7 @@ cc_library( ":ControlFlowDialect", ":ControlFlowToLLVM", ":ConversionPassIncGen", + ":ConvertToLLVMInterface", ":FuncDialect", ":FuncToLLVM", ":GPUCommonTransforms", @@ -6088,6 +6091,7 @@ cc_library( hdrs = [ "include/mlir/Conversion/GPUCommon/AttrToSPIRVConverter.h", "include/mlir/Conversion/GPUCommon/GPUCommonPass.h", + "include/mlir/Conversion/GPUCommon/GPUToLLVM.h", "lib/Conversion/GPUCommon/GPUOpsLowering.h", ], includes = ["include"], @@ -8374,6 +8378,31 @@ cc_library( ], ) +gentbl_cc_library( + name = "ToLLVMInterfaceIncGen", + tbl_outs = [ + ( + ["--gen-attr-interface-decls"], + "include/mlir/Conversion/ConvertToLLVM/ToLLVMAttrInterface.h.inc", + ), + ( + ["--gen-attr-interface-defs"], + "include/mlir/Conversion/ConvertToLLVM/ToLLVMAttrInterface.cpp.inc", + ), + ( + ["--gen-op-interface-decls"], + "include/mlir/Conversion/ConvertToLLVM/ToLLVMOpInterface.h.inc", + ), + ( + ["--gen-op-interface-defs"], + "include/mlir/Conversion/ConvertToLLVM/ToLLVMOpInterface.cpp.inc", + ), + ], + tblgen = ":mlir-tblgen", + td_file = "include/mlir/Conversion/ConvertToLLVM/ToLLVMInterface.td", + deps = [":UBDialectTdFiles"], +) + cc_library( name = "ConvertToLLVMInterface", srcs = ["lib/Conversion/ConvertToLLVM/ToLLVMInterface.cpp"], @@ -8382,6 +8411,7 @@ cc_library( deps = [ ":IR", ":Support", + ":ToLLVMInterfaceIncGen", "//llvm:Support", ], ) @@ -8394,6 +8424,7 @@ cc_library( deps = [ ":ConversionPassIncGen", ":ConvertToLLVMInterface", + ":Analysis", ":IR", ":LLVMCommonConversion", ":LLVMDialect", diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel index c69f793943bee..688edacbc93bf 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel @@ -807,6 +807,7 @@ cc_library( "//mlir:FuncToLLVM", "//mlir:IR", "//mlir:IndexToLLVM", + "//mlir:LLVMCommonConversion", "//mlir:LLVMDialect", "//mlir:LinalgTransforms", "//mlir:MathToLLVM", @@ -815,6 +816,7 @@ cc_library( "//mlir:Pass", "//mlir:ReconcileUnrealizedCasts", "//mlir:SCFToControlFlow", + "//mlir:TransformUtils", "//mlir:Transforms", "//mlir:VectorToLLVM", "//mlir:VectorToSCF",