Skip to content

Commit

Permalink
Responded to PR comments. Cleaned up a lot of helper functions and fi…
Browse files Browse the repository at this point in the history
…xed a few metadata issues
  • Loading branch information
JosephMoore25 committed Dec 18, 2024
1 parent f643681 commit 7b07570
Show file tree
Hide file tree
Showing 7 changed files with 163 additions and 192 deletions.
29 changes: 29 additions & 0 deletions src/include/simeng/arch/aarch64/helpers/neon.hh
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,35 @@ RegisterValue vecLogicOp_3vecs(srcValContainer& sourceValues,
return {out, 256};
}

/** Helper function for NEON instructions with the format `uaddlv zd, zn.T`.
* T represents the type of the destination register (e.g. for h0, T =
* uint32_t).
* U represents the type of the sourceValues[0] (e.g. for v0.8b, U =
* uint8_t)
* Returns correctly formatted RegisterValue. */
template <typename T, typename U, int I>
RegisterValue vecAddlv(srcValContainer& sourceValues) {
const U* n = sourceValues[0].getAsVector<U>();
T out = 0;
for (int i = 0; i < I; i++) {
out += n[i];
}
return {out, 256};
}

/** Helper function for NEON instructions with the format `umaxv Vd, Vn.T`.
* T represents the type of sourceValues (e.g. for vn.s, T = uint32_t).
* Returns correctly formatted RegisterValue. */
template <typename T, int I>
RegisterValue vecUMaxV(srcValContainer& sourceValues) {
const T* n = sourceValues[0].getAsVector<T>();
T out = n[0];
for (int i = 1; i < I; i++) {
out = std::max(n[i], out);
}
return {out, 256};
}

/** Helper function for NEON instructions with the format `umaxp vd, vn, vm`.
* T represents the type of sourceValues (e.g. for vn.2d, T = uint64_t).
* I represents the number of elements in the output array to be updated (e.g.
Expand Down
88 changes: 28 additions & 60 deletions src/include/simeng/arch/aarch64/helpers/sve.hh
Original file line number Diff line number Diff line change
Expand Up @@ -114,33 +114,6 @@ RegisterValue sveAddvPredicated(srcValContainer& sourceValues,
return {out, 256};
}

/** Helper function for NEON instructions with the format `uaddlv Vd, Vn.T`.
* T represents the type of the destination register (e.g. for h0, T =
* uint32_t). U represents the type of the sourceValues[0] (e.g. for v0.8b, U =
* uint8_t) Returns correctly formatted RegisterValue. */
template <typename T, typename U, int I>
RegisterValue sveAddlv(srcValContainer& sourceValues) {
const U* n = sourceValues[0].getAsVector<U>();
T out = 0;
for (int i = 0; i < I; i++) {
out += n[i];
}
return {out, 256};
}

/** Helper function for NEON instructions with the format `umaxv Vd, Vn.T`.
* T represents the type of sourceValues (e.g. for vn.s, T = uint32_t).
* Returns correctly formatted RegisterValue. */
template <typename T, int I>
RegisterValue sveUMaxV(srcValContainer& sourceValues) {
const T* n = sourceValues[0].getAsVector<T>();
T out = n[0];
for (int i = 1; i < I; i++) {
out = std::max(n[i], out);
}
return {out, 256};
}

/** Helper function for SVE instructions with the format `adr zd, [zn, zm{,
* lsl #<1,2,3>}]`.
* T represents the type of sourceValues (e.g. for zn.d, T = uint64_t).
Expand Down Expand Up @@ -284,7 +257,7 @@ RegisterValue sveCpy_imm(
return {out, 256};
}

/** Helper function for SVE instructions with the format `cpy zd, pg/m, vn
/** Helper function for SVE instructions with the format `cpy zd, pg/m, rn
* T represents the type of sourceValues (e.g. for zd.d, T = int64_t).
* Returns correctly formatted RegisterValue. */
template <typename T>
Expand All @@ -294,15 +267,15 @@ RegisterValue sveCpy_Scalar(
const uint16_t VL_bits) {
const T* zd = sourceValues[0].getAsVector<T>();
const uint64_t* p = sourceValues[1].getAsVector<uint64_t>();
const T vn = sourceValues[2].get<T>();
const T rn = sourceValues[2].get<T>();

const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
T out[256 / sizeof(T)] = {0};

for (int i = 0; i < partition_num; i++) {
uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
if (p[i / (64 / sizeof(T))] & shifted_active) {
out[i] = vn;
out[i] = rn;
} else {
out[i] = zd[i];
}
Expand Down Expand Up @@ -956,7 +929,8 @@ RegisterValue sveFTrigSSel(srcValContainer& sourceValues,
}

/** Helper function for SVE instructions with the format `ftmad zd, zn, zm,
* #imm`. T represents the type of sourceValues (e.g. for zn.d, T = double).
* #imm`.
* T represents the type of sourceValues (e.g. for zn.d, T = double).
* Returns correctly formatted RegisterValue. **/
template <typename T>
RegisterValue sveFTrigMad(
Expand Down Expand Up @@ -1110,15 +1084,14 @@ RegisterValue sveIndex(
return {out, 256};
}

/** Helper function for SVE instructions with the format `lastb vd, pg, zn`.
/** Helper function for SVE instructions with the format `lastb rd, pg, zn`.
* T represents the vector register type (e.g. zd.d would be uint64_t).
* Returns correctly formatted RegisterValue. */
template <typename T>
RegisterValue sveLastBScalar(srcValContainer& sourceValues,
const uint16_t VL_bits) {
// sourceValues are wrong and the correct value is in the previous index.
const uint64_t* p = sourceValues[1].getAsVector<uint64_t>();
const T* n = sourceValues[2].getAsVector<T>();
const uint64_t* p = sourceValues[0].getAsVector<uint64_t>();
const T* n = sourceValues[1].getAsVector<T>();

const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
T out;
Expand All @@ -1139,15 +1112,15 @@ RegisterValue sveLastBScalar(srcValContainer& sourceValues,
return {out, 256};
}

/** Helper function for SVE instructions with the format `clastb vd, pg, vd,
* zn`. T represents the vector register type (e.g. zd.d would be uint64_t).
/** Helper function for SVE instructions with the format `clastb rd, pg, rd,
* zn`.
* T represents the vector register type (e.g. zd.d would be uint64_t).
* Returns correctly formatted RegisterValue. */
template <typename T>
RegisterValue sveCLastBScalar(srcValContainer& sourceValues,
const uint16_t VL_bits) {
// sourceValues are wrong and the correct value is in the previous index.
const uint64_t* p = sourceValues[1].getAsVector<uint64_t>();
const uint64_t* m = sourceValues[2].getAsVector<uint64_t>();
const uint64_t m = sourceValues[2].get<T>();
const T* n = sourceValues[3].getAsVector<T>();

const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
Expand All @@ -1164,9 +1137,9 @@ RegisterValue sveCLastBScalar(srcValContainer& sourceValues,
}

if (lastElem < 0) {
out = static_cast<uint64_t>(static_cast<T>(m[0]));
out = m;
} else {
out = static_cast<uint64_t>(static_cast<T>(n[lastElem]));
out = n[lastElem];
}
return {out, 256};
}
Expand Down Expand Up @@ -1504,7 +1477,8 @@ RegisterValue sveOrr_3vecs(srcValContainer& sourceValues,
/** Helper function for SVE2 instructions with the format `psel pd, pn,
* pm.t[wa, #imm]`.
* T represents the type of sourceValues (e.g. for pm.d, T =
* uint64_t). Returns an array of 4 uint64_t elements. */
* uint64_t).
* Returns an array of 4 uint64_t elements. */
template <typename T>
std::array<uint64_t, 4> svePsel(
srcValContainer& sourceValues,
Expand All @@ -1528,12 +1502,13 @@ std::array<uint64_t, 4> svePsel(
return out;
}

/** Helper function for SVE instructions with the format `pfirst pdn, pg, pdn`.
* Returns an array of 4 uint64_t elements. */
std::array<uint64_t, 4> svePfirst(srcValContainer& sourceValues,
const uint16_t VL_bits) {
/** Helper function for SVE instructions with the format `pfirst pdn.b, pg,
* pdn.b`.
* Returns an array of 4 uint64_t elements, and updates the NZCV flags.
*/
std::tuple<std::array<uint64_t, 4>, uint8_t> svePfirst(
srcValContainer& sourceValues, const uint16_t VL_bits) {
const uint16_t partition_num = VL_bits / 8;
// sourceValues are wrong and the correct value is in the previous index.
const uint64_t* p = sourceValues[1].getAsVector<uint64_t>();
const uint64_t* dn = sourceValues[2].getAsVector<uint64_t>();
// Set destination d as source n to copy all false lanes and the active lanes
Expand All @@ -1547,10 +1522,11 @@ std::array<uint64_t, 4> svePfirst(srcValContainer& sourceValues,
break;
}
}
return out;
return {out, getNZCVfromPred(out, VL_bits, 1)};
}

/** Helper function for SVE instructions with the format `pnext pdn, pv, pdn`.
* T represents the type of sourceValues (e.g. for pdn.d, T = uint64_t).
* Returns an array of 4 uint64_t elements, and updates the NZCV flags. */
template <typename T>
std::tuple<std::array<uint64_t, 4>, uint8_t> svePnext(
Expand All @@ -1563,21 +1539,13 @@ std::tuple<std::array<uint64_t, 4>, uint8_t> svePnext(
// Set destination elements to 0
std::array<uint64_t, 4> out = {0, 0, 0, 0};

// Get pattern
const uint16_t count =
sveGetPattern(metadata.operandStr, sizeof(T) * 8, VL_bits);

// Exit early if count == 0
if (count == 0) return {out, getNZCVfromPred(out, VL_bits, sizeof(T))};
// Get last active element of dn.pattern
int lastElem = -1;
for (int i = partition_num - 1; i >= 0; i--) {
if (i < count) {
uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
if (dn[i / (64 / sizeof(T))] & shifted_active) {
lastElem = i;
break;
}
uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
if (dn[i / (64 / sizeof(T))] & shifted_active) {
lastElem = i;
break;
}
}
// Get next active element of p, starting from last of dn.pattern
Expand Down
11 changes: 3 additions & 8 deletions src/lib/arch/aarch64/InstructionMetadata.cc
Original file line number Diff line number Diff line change
Expand Up @@ -563,8 +563,6 @@ InstructionMetadata::InstructionMetadata(const cs_insn& insn)
operands[0].access = CS_AC_READ | CS_AC_WRITE;
operands[1].access = CS_AC_READ;
operands[2].access = CS_AC_READ;
operands[3].access = CS_AC_READ;
operands[3].type = ARM64_OP_IMM;
break;
}
case Opcode::AArch64_FCMLA_ZPmZZ_D: {
Expand All @@ -573,7 +571,6 @@ InstructionMetadata::InstructionMetadata(const cs_insn& insn)
operands[1].access = CS_AC_READ;
operands[2].access = CS_AC_READ;
operands[3].access = CS_AC_READ;
operands[4].access = CS_AC_READ;
operands[4].type = ARM64_OP_IMM;
break;
}
Expand All @@ -583,7 +580,6 @@ InstructionMetadata::InstructionMetadata(const cs_insn& insn)
operands[1].access = CS_AC_READ;
operands[2].access = CS_AC_READ;
operands[3].access = CS_AC_READ;
operands[4].access = CS_AC_READ;
operands[4].type = ARM64_OP_IMM;
break;
}
Expand Down Expand Up @@ -1731,8 +1727,6 @@ InstructionMetadata::InstructionMetadata(const cs_insn& insn)
operands[4].access = CS_AC_READ;
operands[5].access = CS_AC_READ;
break;
case Opcode::AArch64_PFIRST_B:
[[fallthrough]];
case Opcode::AArch64_LASTB_VPZ_D:
[[fallthrough]];
case Opcode::AArch64_LASTB_VPZ_S:
Expand All @@ -1743,6 +1737,7 @@ InstructionMetadata::InstructionMetadata(const cs_insn& insn)
operands[0].access = CS_AC_WRITE;
operands[1].access = CS_AC_READ;
operands[2].access = CS_AC_READ;
break;
}
case Opcode::AArch64_CLASTB_VPZ_D:
[[fallthrough]];
Expand All @@ -1757,6 +1752,8 @@ InstructionMetadata::InstructionMetadata(const cs_insn& insn)
operands[3].access = CS_AC_READ;
break;
}
case Opcode::AArch64_PFIRST_B:
[[fallthrough]];
case Opcode::AArch64_PNEXT_D:
[[fallthrough]];
case Opcode::AArch64_PNEXT_S:
Expand Down Expand Up @@ -2535,8 +2532,6 @@ void InstructionMetadata::revertAliasing() {

operands[2].type = ARM64_OP_REG;
operands[2].access = CS_AC_READ;
operands[3].type = ARM64_OP_IMM;
operands[3].access = CS_AC_READ;
return;
}
return aliasNYI();
Expand Down
17 changes: 10 additions & 7 deletions src/lib/arch/aarch64/Instruction_execute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ void Instruction::execute() {
break;
}
case Opcode::AArch64_UADDLVv8i8v: { // uaddlv hd, vn.8b
results_[0] = sveAddlv<uint32_t, uint8_t, 8>(sourceValues_);
results_[0] = vecAddlv<uint32_t, uint8_t, 8>(sourceValues_);
break;
}
case Opcode::AArch64_ADDWri: { // add wd, wn, #imm{, shift}
Expand Down Expand Up @@ -4149,7 +4149,9 @@ void Instruction::execute() {
break;
}
case Opcode::AArch64_PFIRST_B: { // pfirst pdn.b, pg, pdn.b
results_[0] = svePfirst(sourceValues_, VL_bits);
auto [result, nzcv] = svePfirst(sourceValues_, VL_bits);
results_[0] = nzcv;
results_[1] = result;
break;
}
case Opcode::AArch64_PNEXT_B: { // pnext pdn.b, pv, pdn.b
Expand Down Expand Up @@ -5166,6 +5168,7 @@ void Instruction::execute() {
}
case Opcode::AArch64_STLRW: // stlr wt, [xn]
case Opcode::AArch64_STLRX: { // stlr xt, [xn]
// STORE
memoryData_[0] = sourceValues_[0];
break;
}
Expand Down Expand Up @@ -5795,23 +5798,23 @@ void Instruction::execute() {
break;
}
case Opcode::AArch64_UMAXVv16i8v: { // umaxv bd, vn.16b
results_[0] = sveUMaxV<uint8_t, 16>(sourceValues_);
results_[0] = vecUMaxV<uint8_t, 16>(sourceValues_);
break;
}
case Opcode::AArch64_UMAXVv4i16v: { // umaxv hd, vn.4h
results_[0] = sveUMaxV<uint16_t, 4>(sourceValues_);
results_[0] = vecUMaxV<uint16_t, 4>(sourceValues_);
break;
}
case Opcode::AArch64_UMAXVv4i32v: { // umaxv sd, vn.4s
results_[0] = sveUMaxV<uint32_t, 4>(sourceValues_);
results_[0] = vecUMaxV<uint32_t, 4>(sourceValues_);
break;
}
case Opcode::AArch64_UMAXVv8i16v: { // umaxv hd, vn.8h
results_[0] = sveUMaxV<uint16_t, 8>(sourceValues_);
results_[0] = vecUMaxV<uint16_t, 8>(sourceValues_);
break;
}
case Opcode::AArch64_UMAXVv8i8v: { // umaxv bd, vn.8b
results_[0] = sveUMaxV<uint8_t, 8>(sourceValues_);
results_[0] = vecUMaxV<uint8_t, 8>(sourceValues_);
break;
}
case Opcode::AArch64_UMOVvi32_idx0: // umov wd, vn.s[0]
Expand Down
27 changes: 1 addition & 26 deletions test/regression/aarch64/instructions/load.cc
Original file line number Diff line number Diff line change
Expand Up @@ -732,32 +732,7 @@ TEST_P(InstLoad, ldaxrb) {
EXPECT_EQ(getGeneralRegister<uint32_t>(7), 0x34);
EXPECT_EQ(getGeneralRegister<uint32_t>(8), 0x12);

RUN_AARCH64(R"(
sub sp, sp, #1024
mov w0, #16
mov w1, #32
mov w2, #48
mov w3, #64
str w0, [sp], #32
str w1, [sp], #32
str w2, [sp], #32
str w3, [sp], #32
sub sp, sp, #128
ldaxrb w4, [sp]
add sp, sp, #32
ldaxrb w5, [sp]
add sp, sp, #32
ldaxrb w6, [sp]
add sp, sp, #32
ldaxrb w7, [sp]
)");

EXPECT_EQ(getGeneralRegister<uint32_t>(4), 16);
EXPECT_EQ(getGeneralRegister<uint32_t>(5), 32);
EXPECT_EQ(getGeneralRegister<uint32_t>(6), 48);
EXPECT_EQ(getGeneralRegister<uint32_t>(7), 64);

EXPECT_GROUP(R"(ldaxrb w7, [sp])", LOAD_INT);
EXPECT_GROUP(R"(ldaxrb w8, [x0])", LOAD_INT);
}

TEST_P(InstLoad, ldrb) {
Expand Down
Loading

0 comments on commit 7b07570

Please sign in to comment.