Responded to PR comments. Cleaned up a lot of helper functions and fi…

…xed a few metadata issues
UoB-HPC · Dec 18, 2024 · 7b07570 · 7b07570
1 parent f643681
commit 7b07570
Show file tree

Hide file tree

Showing 7 changed files with 163 additions and 192 deletions.
diff --git a/src/include/simeng/arch/aarch64/helpers/neon.hh b/src/include/simeng/arch/aarch64/helpers/neon.hh
@@ -558,6 +558,35 @@ RegisterValue vecLogicOp_3vecs(srcValContainer& sourceValues,
   return {out, 256};
 }
 
+/** Helper function for NEON instructions with the format `uaddlv zd, zn.T`.
+ * T represents the type of the destination register (e.g. for h0, T =
+ * uint32_t).
+ * U represents the type of the sourceValues[0] (e.g. for v0.8b, U =
+ * uint8_t)
+ * Returns correctly formatted RegisterValue. */
+template <typename T, typename U, int I>
+RegisterValue vecAddlv(srcValContainer& sourceValues) {
+  const U* n = sourceValues[0].getAsVector<U>();
+  T out = 0;
+  for (int i = 0; i < I; i++) {
+    out += n[i];
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `umaxv Vd, Vn.T`.
+ * T represents the type of sourceValues (e.g. for vn.s, T = uint32_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecUMaxV(srcValContainer& sourceValues) {
+  const T* n = sourceValues[0].getAsVector<T>();
+  T out = n[0];
+  for (int i = 1; i < I; i++) {
+    out = std::max(n[i], out);
+  }
+  return {out, 256};
+}
+
 /** Helper function for NEON instructions with the format `umaxp vd, vn, vm`.
  * T represents the type of sourceValues (e.g. for vn.2d, T = uint64_t).
  * I represents the number of elements in the output array to be updated (e.g.

diff --git a/src/include/simeng/arch/aarch64/helpers/sve.hh b/src/include/simeng/arch/aarch64/helpers/sve.hh
@@ -114,33 +114,6 @@ RegisterValue sveAddvPredicated(srcValContainer& sourceValues,
   return {out, 256};
 }
 
-/** Helper function for NEON instructions with the format `uaddlv Vd, Vn.T`.
- * T represents the type of the destination register (e.g. for h0, T =
- * uint32_t). U represents the type of the sourceValues[0] (e.g. for v0.8b, U =
- * uint8_t) Returns correctly formatted RegisterValue. */
-template <typename T, typename U, int I>
-RegisterValue sveAddlv(srcValContainer& sourceValues) {
-  const U* n = sourceValues[0].getAsVector<U>();
-  T out = 0;
-  for (int i = 0; i < I; i++) {
-    out += n[i];
-  }
-  return {out, 256};
-}
-
-/** Helper function for NEON instructions with the format `umaxv Vd, Vn.T`.
- * T represents the type of sourceValues (e.g. for vn.s, T = uint32_t).
- * Returns correctly formatted RegisterValue. */
-template <typename T, int I>
-RegisterValue sveUMaxV(srcValContainer& sourceValues) {
-  const T* n = sourceValues[0].getAsVector<T>();
-  T out = n[0];
-  for (int i = 1; i < I; i++) {
-    out = std::max(n[i], out);
-  }
-  return {out, 256};
-}
-
 /** Helper function for SVE instructions with the format `adr zd, [zn, zm{,
  * lsl #<1,2,3>}]`.
  * T represents the type of sourceValues (e.g. for zn.d, T = uint64_t).
@@ -284,7 +257,7 @@ RegisterValue sveCpy_imm(
   return {out, 256};
 }
 
-/** Helper function for SVE instructions with the format `cpy zd, pg/m, vn
+/** Helper function for SVE instructions with the format `cpy zd, pg/m, rn
  * T represents the type of sourceValues (e.g. for zd.d, T = int64_t).
  * Returns correctly formatted RegisterValue. */
 template <typename T>
@@ -294,15 +267,15 @@ RegisterValue sveCpy_Scalar(
     const uint16_t VL_bits) {
   const T* zd = sourceValues[0].getAsVector<T>();
   const uint64_t* p = sourceValues[1].getAsVector<uint64_t>();
-  const T vn = sourceValues[2].get<T>();
+  const T rn = sourceValues[2].get<T>();
 
   const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
   T out[256 / sizeof(T)] = {0};
 
   for (int i = 0; i < partition_num; i++) {
     uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
     if (p[i / (64 / sizeof(T))] & shifted_active) {
-      out[i] = vn;
+      out[i] = rn;
     } else {
       out[i] = zd[i];
     }
@@ -956,7 +929,8 @@ RegisterValue sveFTrigSSel(srcValContainer& sourceValues,
 }
 
 /** Helper function for SVE instructions with the format `ftmad zd, zn, zm,
- * #imm`. T represents the type of sourceValues (e.g. for zn.d, T = double).
+ * #imm`.
+ * T represents the type of sourceValues (e.g. for zn.d, T = double).
  * Returns correctly formatted RegisterValue. **/
 template <typename T>
 RegisterValue sveFTrigMad(
@@ -1110,15 +1084,14 @@ RegisterValue sveIndex(
   return {out, 256};
 }
 
-/** Helper function for SVE instructions with the format `lastb vd, pg, zn`.
+/** Helper function for SVE instructions with the format `lastb rd, pg, zn`.
  * T represents the vector register type (e.g. zd.d would be uint64_t).
  * Returns correctly formatted RegisterValue. */
 template <typename T>
 RegisterValue sveLastBScalar(srcValContainer& sourceValues,
                              const uint16_t VL_bits) {
-  // sourceValues are wrong and the correct value is in the previous index.
-  const uint64_t* p = sourceValues[1].getAsVector<uint64_t>();
-  const T* n = sourceValues[2].getAsVector<T>();
+  const uint64_t* p = sourceValues[0].getAsVector<uint64_t>();
+  const T* n = sourceValues[1].getAsVector<T>();
 
   const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
   T out;
@@ -1139,15 +1112,15 @@ RegisterValue sveLastBScalar(srcValContainer& sourceValues,
   return {out, 256};
 }
 
-/** Helper function for SVE instructions with the format `clastb vd, pg, vd,
- * zn`. T represents the vector register type (e.g. zd.d would be uint64_t).
+/** Helper function for SVE instructions with the format `clastb rd, pg, rd,
+ * zn`.
+ * T represents the vector register type (e.g. zd.d would be uint64_t).
  * Returns correctly formatted RegisterValue. */
 template <typename T>
 RegisterValue sveCLastBScalar(srcValContainer& sourceValues,
                               const uint16_t VL_bits) {
-  // sourceValues are wrong and the correct value is in the previous index.
   const uint64_t* p = sourceValues[1].getAsVector<uint64_t>();
-  const uint64_t* m = sourceValues[2].getAsVector<uint64_t>();
+  const uint64_t m = sourceValues[2].get<T>();
   const T* n = sourceValues[3].getAsVector<T>();
 
   const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
@@ -1164,9 +1137,9 @@ RegisterValue sveCLastBScalar(srcValContainer& sourceValues,
   }
 
   if (lastElem < 0) {
-    out = static_cast<uint64_t>(static_cast<T>(m[0]));
+    out = m;
   } else {
-    out = static_cast<uint64_t>(static_cast<T>(n[lastElem]));
+    out = n[lastElem];
   }
   return {out, 256};
 }
@@ -1504,7 +1477,8 @@ RegisterValue sveOrr_3vecs(srcValContainer& sourceValues,
 /** Helper function for SVE2 instructions with the format `psel pd, pn,
  * pm.t[wa, #imm]`.
  * T represents the type of sourceValues (e.g. for pm.d, T =
- * uint64_t). Returns an array of 4 uint64_t elements. */
+ * uint64_t).
+ * Returns an array of 4 uint64_t elements. */
 template <typename T>
 std::array<uint64_t, 4> svePsel(
     srcValContainer& sourceValues,
@@ -1528,12 +1502,13 @@ std::array<uint64_t, 4> svePsel(
   return out;
 }
 
-/** Helper function for SVE instructions with the format `pfirst pdn, pg, pdn`.
- * Returns an array of 4 uint64_t elements. */
-std::array<uint64_t, 4> svePfirst(srcValContainer& sourceValues,
-                                  const uint16_t VL_bits) {
+/** Helper function for SVE instructions with the format `pfirst pdn.b, pg,
+ * pdn.b`.
+ * Returns an array of 4 uint64_t elements, and updates the NZCV flags.
+ */
+std::tuple<std::array<uint64_t, 4>, uint8_t> svePfirst(
+    srcValContainer& sourceValues, const uint16_t VL_bits) {
   const uint16_t partition_num = VL_bits / 8;
-  // sourceValues are wrong and the correct value is in the previous index.
   const uint64_t* p = sourceValues[1].getAsVector<uint64_t>();
   const uint64_t* dn = sourceValues[2].getAsVector<uint64_t>();
   // Set destination d as source n to copy all false lanes and the active lanes
@@ -1547,10 +1522,11 @@ std::array<uint64_t, 4> svePfirst(srcValContainer& sourceValues,
       break;
     }
   }
-  return out;
+  return {out, getNZCVfromPred(out, VL_bits, 1)};
 }
 
 /** Helper function for SVE instructions with the format `pnext pdn, pv, pdn`.
+ * T represents the type of sourceValues (e.g. for pdn.d, T = uint64_t).
  * Returns an array of 4 uint64_t elements, and updates the NZCV flags. */
 template <typename T>
 std::tuple<std::array<uint64_t, 4>, uint8_t> svePnext(
@@ -1563,21 +1539,13 @@ std::tuple<std::array<uint64_t, 4>, uint8_t> svePnext(
   // Set destination elements to 0
   std::array<uint64_t, 4> out = {0, 0, 0, 0};
 
-  // Get pattern
-  const uint16_t count =
-      sveGetPattern(metadata.operandStr, sizeof(T) * 8, VL_bits);
-
-  // Exit early if count == 0
-  if (count == 0) return {out, getNZCVfromPred(out, VL_bits, sizeof(T))};
   // Get last active element of dn.pattern
   int lastElem = -1;
   for (int i = partition_num - 1; i >= 0; i--) {
-    if (i < count) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (dn[i / (64 / sizeof(T))] & shifted_active) {
-        lastElem = i;
-        break;
-      }
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (dn[i / (64 / sizeof(T))] & shifted_active) {
+      lastElem = i;
+      break;
     }
   }
   // Get next active element of p, starting from last of dn.pattern

diff --git a/src/lib/arch/aarch64/InstructionMetadata.cc b/src/lib/arch/aarch64/InstructionMetadata.cc
@@ -563,8 +563,6 @@ InstructionMetadata::InstructionMetadata(const cs_insn& insn)
       operands[0].access = CS_AC_READ | CS_AC_WRITE;
       operands[1].access = CS_AC_READ;
       operands[2].access = CS_AC_READ;
-      operands[3].access = CS_AC_READ;
-      operands[3].type = ARM64_OP_IMM;
       break;
     }
     case Opcode::AArch64_FCMLA_ZPmZZ_D: {
@@ -573,7 +571,6 @@ InstructionMetadata::InstructionMetadata(const cs_insn& insn)
       operands[1].access = CS_AC_READ;
       operands[2].access = CS_AC_READ;
       operands[3].access = CS_AC_READ;
-      operands[4].access = CS_AC_READ;
       operands[4].type = ARM64_OP_IMM;
       break;
     }
@@ -583,7 +580,6 @@ InstructionMetadata::InstructionMetadata(const cs_insn& insn)
       operands[1].access = CS_AC_READ;
       operands[2].access = CS_AC_READ;
       operands[3].access = CS_AC_READ;
-      operands[4].access = CS_AC_READ;
       operands[4].type = ARM64_OP_IMM;
       break;
     }
@@ -1731,8 +1727,6 @@ InstructionMetadata::InstructionMetadata(const cs_insn& insn)
       operands[4].access = CS_AC_READ;
       operands[5].access = CS_AC_READ;
       break;
-    case Opcode::AArch64_PFIRST_B:
-      [[fallthrough]];
     case Opcode::AArch64_LASTB_VPZ_D:
       [[fallthrough]];
     case Opcode::AArch64_LASTB_VPZ_S:
@@ -1743,6 +1737,7 @@ InstructionMetadata::InstructionMetadata(const cs_insn& insn)
       operands[0].access = CS_AC_WRITE;
       operands[1].access = CS_AC_READ;
       operands[2].access = CS_AC_READ;
+      break;
     }
     case Opcode::AArch64_CLASTB_VPZ_D:
       [[fallthrough]];
@@ -1757,6 +1752,8 @@ InstructionMetadata::InstructionMetadata(const cs_insn& insn)
       operands[3].access = CS_AC_READ;
       break;
     }
+    case Opcode::AArch64_PFIRST_B:
+      [[fallthrough]];
     case Opcode::AArch64_PNEXT_D:
       [[fallthrough]];
     case Opcode::AArch64_PNEXT_S:
@@ -2535,8 +2532,6 @@ void InstructionMetadata::revertAliasing() {
 
         operands[2].type = ARM64_OP_REG;
         operands[2].access = CS_AC_READ;
-        operands[3].type = ARM64_OP_IMM;
-        operands[3].access = CS_AC_READ;
         return;
       }
       return aliasNYI();

diff --git a/src/lib/arch/aarch64/Instruction_execute.cc b/src/lib/arch/aarch64/Instruction_execute.cc
@@ -206,7 +206,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_UADDLVv8i8v: {  // uaddlv hd, vn.8b
-        results_[0] = sveAddlv<uint32_t, uint8_t, 8>(sourceValues_);
+        results_[0] = vecAddlv<uint32_t, uint8_t, 8>(sourceValues_);
         break;
       }
       case Opcode::AArch64_ADDWri: {  // add wd, wn, #imm{, shift}
@@ -4149,7 +4149,9 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_PFIRST_B: {  // pfirst pdn.b, pg, pdn.b
-        results_[0] = svePfirst(sourceValues_, VL_bits);
+        auto [result, nzcv] = svePfirst(sourceValues_, VL_bits);
+        results_[0] = nzcv;
+        results_[1] = result;
         break;
       }
       case Opcode::AArch64_PNEXT_B: {  // pnext pdn.b, pv, pdn.b
@@ -5166,6 +5168,7 @@ void Instruction::execute() {
       }
       case Opcode::AArch64_STLRW:    // stlr wt, [xn]
       case Opcode::AArch64_STLRX: {  // stlr xt, [xn]
+        // STORE
         memoryData_[0] = sourceValues_[0];
         break;
       }
@@ -5795,23 +5798,23 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_UMAXVv16i8v: {  // umaxv bd, vn.16b
-        results_[0] = sveUMaxV<uint8_t, 16>(sourceValues_);
+        results_[0] = vecUMaxV<uint8_t, 16>(sourceValues_);
         break;
       }
       case Opcode::AArch64_UMAXVv4i16v: {  // umaxv hd, vn.4h
-        results_[0] = sveUMaxV<uint16_t, 4>(sourceValues_);
+        results_[0] = vecUMaxV<uint16_t, 4>(sourceValues_);
         break;
       }
       case Opcode::AArch64_UMAXVv4i32v: {  // umaxv sd, vn.4s
-        results_[0] = sveUMaxV<uint32_t, 4>(sourceValues_);
+        results_[0] = vecUMaxV<uint32_t, 4>(sourceValues_);
         break;
       }
       case Opcode::AArch64_UMAXVv8i16v: {  // umaxv hd, vn.8h
-        results_[0] = sveUMaxV<uint16_t, 8>(sourceValues_);
+        results_[0] = vecUMaxV<uint16_t, 8>(sourceValues_);
         break;
       }
       case Opcode::AArch64_UMAXVv8i8v: {  // umaxv bd, vn.8b
-        results_[0] = sveUMaxV<uint8_t, 8>(sourceValues_);
+        results_[0] = vecUMaxV<uint8_t, 8>(sourceValues_);
         break;
       }
       case Opcode::AArch64_UMOVvi32_idx0:  // umov wd, vn.s[0]

diff --git a/test/regression/aarch64/instructions/load.cc b/test/regression/aarch64/instructions/load.cc
@@ -732,32 +732,7 @@ TEST_P(InstLoad, ldaxrb) {
   EXPECT_EQ(getGeneralRegister<uint32_t>(7), 0x34);
   EXPECT_EQ(getGeneralRegister<uint32_t>(8), 0x12);
 
-  RUN_AARCH64(R"(
-    sub sp, sp, #1024
-    mov w0, #16
-    mov w1, #32
-    mov w2, #48
-    mov w3, #64
-    str w0, [sp], #32
-    str w1, [sp], #32
-    str w2, [sp], #32
-    str w3, [sp], #32
-    sub sp, sp, #128
-    ldaxrb w4, [sp]
-    add sp, sp, #32
-    ldaxrb w5, [sp]
-    add sp, sp, #32
-    ldaxrb w6, [sp]
-    add sp, sp, #32
-    ldaxrb w7, [sp]
-  )");
-
-  EXPECT_EQ(getGeneralRegister<uint32_t>(4), 16);
-  EXPECT_EQ(getGeneralRegister<uint32_t>(5), 32);
-  EXPECT_EQ(getGeneralRegister<uint32_t>(6), 48);
-  EXPECT_EQ(getGeneralRegister<uint32_t>(7), 64);
-
-  EXPECT_GROUP(R"(ldaxrb w7, [sp])", LOAD_INT);
+  EXPECT_GROUP(R"(ldaxrb w8, [x0])", LOAD_INT);
 }
 
 TEST_P(InstLoad, ldrb) {