llvm · wzssyqa · Jan 24, 2025 · Jan 14, 2025 · Jan 15, 2025
diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -42,6 +42,7 @@
 #include "llvm/TargetParser/Triple.h"
 #include <algorithm>
 #include <cassert>
+#include <cstddef>
 #include <cstdint>
 #include <iterator>
 #include <utility>
@@ -59,6 +60,45 @@ static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
                                             "stores to their single precision "
                                             "counterparts"));
 
+// Widen the v2 vectors to the register width, i.e. v2i16 -> v8i16,
+// v2i32 -> v4i32, etc, to ensure the correct rail size is used, i.e.
+// INST.h for v16, INST.w for v32, INST.d for v64.
+TargetLoweringBase::LegalizeTypeAction
+MipsSETargetLowering::getPreferredVectorAction(MVT VT) const {
+  if (this->Subtarget.hasMSA()) {
+    switch (VT.SimpleTy) {
+    // Leave v2i1 vectors to be promoted to larger ones.
+    // Other i1 types will be promoted by default.
+    case MVT::v2i1:
+      return TypePromoteInteger;
+      break;
+    // 16-bit vector types (v2 and longer)
+    case MVT::v2i8:
+    // 32-bit vector types (v2 and longer)
+    case MVT::v2i16:
+    case MVT::v4i8:
+    // 64-bit vector types (v2 and longer)
+    case MVT::v2i32:
+    case MVT::v4i16:
+    case MVT::v8i8:
+      return TypeWidenVector;
+      break;
+    // Only word (.w) and doubleword (.d) are available for floating point
+    // vectors. That means floating point vectors should be either v2f64
+    // or v4f32.
+    // Here we only explicitly widen the f32 types - f16 will be promoted
+    // by default.
+    case MVT::v2f32:
+    case MVT::v3f32:
+      return TypeWidenVector;
+    // v2i64 is already 128-bit wide.
+    default:
+      break;
+    }
+  }
+  return TargetLoweringBase::getPreferredVectorAction(VT);
+}
+
 MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
                                            const MipsSubtarget &STI)
     : MipsTargetLowering(TM, STI) {
@@ -2929,8 +2969,14 @@ static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy,
 // if the type is v8i16 and all the indices are less than 8 then the second
 // operand is unused and can be replaced with anything. We choose to replace it
 // with the used operand since this reduces the number of instructions overall.
+//
+// NOTE: SPLATI shuffle masks may contain UNDEFs, since isSPLATI() treats
+//       UNDEFs as same as SPLATI index.
+//       For other instances we use the last valid index if UNDEF is
+//       encountered.
 static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy,
                                         const SmallVector<int, 16> &Indices,
+                                        const bool isSPLATI,
                                         SelectionDAG &DAG) {
   SmallVector<SDValue, 16> Ops;
   SDValue Op0;
@@ -2942,6 +2988,9 @@ static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy,
   SDLoc DL(Op);
   int ResTyNumElts = ResTy.getVectorNumElements();
 
+  assert(Indices[0] >= 0 &&
+         "shuffle mask starts with an UNDEF, which is not expected");
+
   for (int i = 0; i < ResTyNumElts; ++i) {
     // Idx == -1 means UNDEF
     int Idx = Indices[i];
@@ -2951,9 +3000,17 @@ static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy,
     if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2)
       Using2ndVec = true;
   }
-
-  for (int Idx : Indices)
+  int LastValidIndex = 0;
+  for (size_t i = 0; i < Indices.size(); i++) {
+    int Idx = Indices[i];
+    if (Idx < 0) {
+      // Continue using splati index or use the last valid index.
+      Idx = isSPLATI ? Indices[0] : LastValidIndex;
+    } else {
+      LastValidIndex = Idx;
+    }
     Ops.push_back(DAG.getTargetConstant(Idx, DL, MaskEltTy));
+  }
 
   SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
 
@@ -2996,7 +3053,7 @@ SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
   // splati.[bhwd] is preferable to the others but is matched from
   // MipsISD::VSHF.
   if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG))
-    return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG);
+    return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, true, DAG);
   SDValue Result;
   if ((Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG)))
     return Result;
@@ -3012,7 +3069,7 @@ SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
     return Result;
   if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG)))
     return Result;
-  return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG);
+  return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, false, DAG);
 }
 
 MachineBasicBlock *

diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.h b/llvm/lib/Target/Mips/MipsSEISelLowering.h
@@ -45,6 +45,9 @@ class TargetRegisterClass;
         MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
         unsigned *Fast = nullptr) const override;
 
+    TargetLoweringBase::LegalizeTypeAction
+    getPreferredVectorAction(MVT VT) const override;
+
     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 
     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;