From e3433ad10570cc9d37448b72d0adf1c8ba917c26 Mon Sep 17 00:00:00 2001 From: aasaitha Date: Fri, 20 Mar 2026 21:04:41 +0000 Subject: [PATCH 1/3] Add support to generate PPR data within the BMC disk Description Added the feature to generate the PPR data both the Run time and Boot time PPR within the BMC alongside the existing CPER's. Changes 1. Add new source file to handle the PPR generation. 2. Add meson change to build the new PPR source file. 3. Intercept the dumpData from the MCAErrorInfo and before we create CPER and delete the dump parse the MCAErrorInfo to construct the PPR's. 4. The filename convention follows the related CPER file name. Testing Validated internally using signal-based error injection confirming that both CPER files and their corresponding PPR's RTPPR.json and BTPPR.json are generated correctly for correctable and deferred errors. Signed-off-by: aasaitha --- include/oem_cper.hpp | 1 + include/utils/ppr_json.hpp | 65 +++++++++ meson.build | 1 + src/apml_manager.cpp | 33 +++++ src/utils/ppr_json.cpp | 268 +++++++++++++++++++++++++++++++++++++ 5 files changed, 368 insertions(+) create mode 100644 include/utils/ppr_json.hpp create mode 100644 src/utils/ppr_json.cpp diff --git a/include/oem_cper.hpp b/include/oem_cper.hpp index d81fa94..a1df705 100644 --- a/include/oem_cper.hpp +++ b/include/oem_cper.hpp @@ -4,6 +4,7 @@ extern "C" { #include "libcper/Cper.h" } +#include constexpr uint8_t mcaDataBankLen = 128; constexpr uint16_t debugDumpDataLen = 12124; diff --git a/include/utils/ppr_json.hpp b/include/utils/ppr_json.hpp new file mode 100644 index 0000000..df01470 --- /dev/null +++ b/include/utils/ppr_json.hpp @@ -0,0 +1,65 @@ +#pragma once + +#include "oem_cper.hpp" + +#include +#include +#include + +namespace amd +{ +namespace ras +{ +namespace util +{ +namespace ppr +{ + +// UMC bank identification from MCA_IPID +// hardware_id = (MCA_IPID >> 32) & 0xFFF => bits[43:32] of 64-bit IPID +// mca_type = MCA_IPID & 0xFFF => bits[11: 0] +constexpr uint32_t umcHardwareId = 0x96; +constexpr uint32_t umcMcaType = 0x00; + +// PPR trigger ErrorCodeExt values from MCA_STATUS[21:16] +constexpr uint32_t errCodeDramEcc = 0x00; // DramEccErr +constexpr uint32_t errCodeEcsRow = 0x08; // EcsRowErr + +constexpr uint32_t pprRepairTypeRtSoft = 0x0000; // RUNTIME_SOFT +constexpr uint32_t pprRepairTypeBtSoft = 0x8000; // BOOTTIME_SOFT + +// mcaErr offsets -> word index: +// MCA_STATUS_LO offset 0x08 -> DumpData[2] +// MCA_STATUS_HI offset 0x0C -> DumpData[3] +// MCA_ADDR_LO offset 0x10 -> DumpData[4] +// MCA_ADDR_HI offset 0x14 -> DumpData[5] +// MCA_IPID_LO offset 0x28 -> DumpData[10] +// MCA_IPID_HI offset 0x2C -> DumpData[11] +// MCA_SYND_LO offset 0x30 -> DumpData[12] +// +// dramCeccErr adds baseOffset=4, so indices shift down by 1. +// Use isDram=true to apply the -1 shift. + +/** @brief Scan ptr->McaErrorInfo[0..sectionCount-1] and if PPR needed + * write *_rtppr.json and/or *_btppr.json files to RAS_DIR. + * + * + * @param[in] ptr Shared pointer to the MCA or DRAM runtime CPER + * record. + * @param[in] sectionStart Index of the first section to scan. + * @param[in] sectionCount Number of sections to scan from sectionStart. + * @param[in] socNum Socket number. + * @param[in] errCount Current error file counter. + * @param[in] node Node string for filename prefix. + * @param[in] isDram true → dramCeccErr + * false → mcaErr + */ +void generatePprJsonFiles(const std::shared_ptr& ptr, + uint16_t sectionStart, uint16_t sectionCount, + uint8_t socNum, size_t errCount, + const std::string& node, bool isDram); + +} // namespace ppr +} // namespace util +} // namespace ras +} // namespace amd diff --git a/meson.build b/meson.build index 76f394b..ef331b4 100644 --- a/meson.build +++ b/meson.build @@ -67,6 +67,7 @@ sources = [ 'src/apml_manager.cpp', 'src/utils/util.cpp', 'src/utils/cper.cpp', + 'src/utils/ppr_json.cpp', ] if apml diff --git a/src/apml_manager.cpp b/src/apml_manager.cpp index e4b2dba..b13c0e7 100644 --- a/src/apml_manager.cpp +++ b/src/apml_manager.cpp @@ -3,6 +3,7 @@ #include "config_manager.hpp" #include "oem_cper.hpp" #include "utils/cper.hpp" +#include "utils/ppr_json.hpp" #include "utils/util.hpp" extern "C" @@ -768,6 +769,22 @@ void Manager::harvestRuntimeErrors(uint8_t errorPollingType, amd::ras::util::cper::dumpErrorDescriptor( mcaPtr, sectionCount, runtimeMcaErr, severity, progId); + // Generate RTPPR / BTPPR JSON files from the in-memory MCA register + if (p0Inst.number_of_inst != 0) + { + amd::ras::util::ppr::generatePprJsonFiles( + mcaPtr, 0, p0Inst.number_of_inst, + static_cast(socIndex[0]), errCount, node, false); + } + if (p1Inst.number_of_inst != 0) + { + amd::ras::util::ppr::generatePprJsonFiles( + mcaPtr, + static_cast(sectionCount - p1Inst.number_of_inst), + p1Inst.number_of_inst, + static_cast(socIndex[1]), errCount, node, false); + } + amd::ras::util::cper::createFile(mcaPtr, runtimeMcaErr, sectionCount, errCount, node); @@ -826,6 +843,22 @@ void Manager::harvestRuntimeErrors(uint8_t errorPollingType, amd::ras::util::cper::dumpErrorDescriptor( dramPtr, sectionCount, runtimeDramErr, severity, progId); + // Generate RTPPR / BTPPR JSON files from the in-memory DRAM CECC + if (p0Inst.number_of_inst != 0) + { + amd::ras::util::ppr::generatePprJsonFiles( + dramPtr, 0, p0Inst.number_of_inst, + static_cast(socIndex[0]), errCount, node, true); + } + if (p1Inst.number_of_inst != 0) + { + amd::ras::util::ppr::generatePprJsonFiles( + dramPtr, + static_cast(sectionCount - p1Inst.number_of_inst), + p1Inst.number_of_inst, + static_cast(socIndex[1]), errCount, node, true); + } + amd::ras::util::cper::createFile(dramPtr, runtimeDramErr, sectionCount, errCount, node); diff --git a/src/utils/ppr_json.cpp b/src/utils/ppr_json.cpp new file mode 100644 index 0000000..43b06c8 --- /dev/null +++ b/src/utils/ppr_json.cpp @@ -0,0 +1,268 @@ +#include "utils/ppr_json.hpp" + +#include +#include + +#include + +#include +#include + +namespace amd +{ +namespace ras +{ +namespace util +{ +namespace ppr +{ + +// repair entry struct +struct DpprclRepairEntry +{ + uint8_t DeviceTypeToRepair{0}; // 3 bits + uint8_t Bank{0}; // 5 bits BG[4:2] | BA[1:0] + uint8_t Device{0}; // 5 bits failed device index + uint8_t ChipSelect{0}; // 2 bits CS + uint16_t Column{0}; // 11 bits + uint8_t TargetDevice{0}; // 5 bits + uint8_t Valid{0}; // 1 bit + uint32_t Row{0}; // 18 bits + uint8_t RankMultiplier{0}; // 3 bits + uint8_t Channel{0}; // 4 bits UMC channel (IPID[23:20]) + uint8_t SubChannel{0}; // 1 bit sub-channel (SYND[3]) + uint8_t HardPPRDone{0}; // 1 bit (set by CPU after hPPR) + uint8_t PPRUndo{0}; // 1 bit + uint8_t PPRLock{0}; // 1 bit + uint8_t Socket{0}; // 3 bits processor socket number + uint8_t RepairType{0}; // 3 bits inner type (0=soft) + uint8_t ErrorCause{0}; // 3 bits 1=corrected, 3=deferred + uint8_t Reserved1{0}; // 2 bits + uint8_t RepairResult{0}; // 8 bits filled with status after repair + uint16_t Reserved2{0}; // 16 bits + uint32_t AddressLo{0}; // 32 bits physical address [31:0] + uint32_t AddressHi{0}; // 32 bits physical address [63:32] +}; + +// Payload packing +// Payload[0] = bits[ 15: 0] +// Payload[1] = bits[ 31: 16] +// Payload[2] = bits[ 47: 32] +// Payload[3] = bits[ 63: 48] +// Payload[4] = bits[ 79: 64] +// Payload[5] = bits[ 95: 80] +// Payload[6] = bits[111: 96] +// Payload[7] = bits[127:112] +// Payload[8] = bits[143:128] +// Payload[9] = bits[159:144] + +static void packPayload(const DpprclRepairEntry& e, uint16_t payload[10]) +{ + payload[0] = static_cast( + ((e.Column & 0x1U) << 15) | + ((e.ChipSelect & 0x3U) << 13) | + ((e.Device & 0x1FU) << 8) | + ((e.Bank & 0x1FU) << 3) | + (e.DeviceTypeToRepair & 0x7U)); + + payload[1] = static_cast( + ((e.Valid & 0x1U) << 15) | + ((e.TargetDevice & 0x1FU) << 10) | + ((e.Column >> 1U) & 0x3FFU)); + + payload[2] = static_cast(e.Row & 0xFFFFU); + + payload[3] = static_cast( + ((e.Socket & 0x7U) << 13) | + ((e.PPRLock & 0x1U) << 12) | + ((e.PPRUndo & 0x1U) << 11) | + ((e.HardPPRDone & 0x1U) << 10) | + ((e.SubChannel & 0x1U) << 9) | + ((e.Channel & 0xFU) << 5) | + ((e.RankMultiplier & 0x7U) << 2) | + ((e.Row >> 16U) & 0x3U)); + + payload[4] = static_cast( + ((e.RepairResult & 0xFFU) << 8) | + ((e.Reserved1 & 0x3U) << 6) | + ((e.ErrorCause & 0x7U) << 3) | + (e.RepairType & 0x7U)); + + payload[5] = static_cast(e.Reserved2 & 0xFFFFU); + + payload[6] = static_cast( e.AddressLo & 0xFFFFU); + payload[7] = static_cast((e.AddressLo >> 16) & 0xFFFFU); + payload[8] = static_cast( e.AddressHi & 0xFFFFU); + payload[9] = static_cast((e.AddressHi >> 16) & 0xFFFFU); +} + +// Create PPR FileName +static std::string buildPprPath(size_t errCount, const std::string& node, + bool isDram, bool isBt) +{ + std::string name = + (isDram ? "dram-runtime-" : "mca-runtime-") + + std::string("ras-error") + std::to_string(errCount) + + (isBt ? "_btppr.json" : "_rtppr.json"); + + if (node == "1" || node == "2") + { + name = "node" + node + "-" + name; + } + + return std::string(RAS_DIR) + name; +} + +// Write PPR json entries +static void writeJson(const std::string& path, uint32_t repairType, + uint8_t socNum, const uint16_t payload[10]) +{ + nlohmann::json j; + j["pprDataIn"] = nlohmann::json::array(); + + nlohmann::json entry; + entry["RepairType"] = repairType; + entry["RepairEntryNum"] = 0; + entry["SocNum"] = static_cast(socNum); + entry["Payload"] = nlohmann::json::array(); + + for (int i = 0; i < 10; ++i) + { + entry["Payload"].push_back(static_cast(payload[i])); + } + j["pprDataIn"].push_back(entry); + + std::ofstream ofs(path); + if (!ofs.is_open()) + { + lg2::error("PPR JSON: failed to open {PATH} for writing", "PATH", path); + return; + } + ofs << j.dump(4); + lg2::info("PPR JSON written: {PATH}", "PATH", path); +} + +// Function body to generatePPR while harvesting +void generatePprJsonFiles(const std::shared_ptr& ptr, + uint16_t sectionStart, uint16_t sectionCount, + uint8_t socNum, size_t errCount, + const std::string& node, bool isDram) +{ + if (!ptr || !ptr->McaErrorInfo) + { + return; + } + + // wOff = -1 for dramCeccErr, 0 for mcaErr. + const int wOff = isDram ? -1 : 0; + + // Baseline word indices for mcaErr path (baseOffset = 0): + constexpr int kStatusLo = 2; // offset 0x08 + constexpr int kStatusHi = 3; // offset 0x0C + constexpr int kAddrLo = 4; // offset 0x10 (MCA_ADDR) + constexpr int kAddrHi = 5; // offset 0x14 + constexpr int kIpidLo = 10; // offset 0x28 (MCA_IPID) + constexpr int kIpidHi = 11; // offset 0x2C + constexpr int kSyndLo = 12; // offset 0x30 (MCA_SYND) + + for (uint16_t s = sectionStart; s < sectionStart + sectionCount; ++s) + { + uint32_t dumpBuf[length32]; + memcpy(dumpBuf, ptr->McaErrorInfo[s].DumpData, sizeof(dumpBuf)); + const uint32_t* d = dumpBuf; + + // Extract MCA registers + const uint64_t mcaStatus = + (static_cast(d[kStatusHi + wOff]) << 32) | + d[kStatusLo + wOff]; + + const uint64_t mcaAddr = + (static_cast(d[kAddrHi + wOff]) << 32) | + d[kAddrLo + wOff]; + + const uint64_t mcaIpid = + (static_cast(d[kIpidHi + wOff]) << 32) | + d[kIpidLo + wOff]; + + const uint32_t mcaSynd = d[kSyndLo + wOff]; + + // UMC bank detection + const uint32_t hwId = static_cast((mcaIpid >> 32) & 0xFFFU); + const uint32_t mcaType = static_cast( mcaIpid & 0xFFFU); + + if (hwId != umcHardwareId || mcaType != umcMcaType) + { + continue; // Not a UMC bank no need for PPRs + } + + // Severity + const bool uc = static_cast((mcaStatus >> 61) & 0x1U); + const bool deferred = static_cast((mcaStatus >> 44) & 0x1U); + const bool corrected = !uc && !deferred; + const bool deferOnly = !uc && deferred; + + if (!corrected && !deferOnly) + { + continue; + } + + // ErrorCodeExt + const uint32_t ece = static_cast((mcaStatus >> 16) & 0x3FU); + + if (ece != errCodeDramEcc && ece != errCodeEcsRow) + { + continue; + } + + // Fill PPR entry + DpprclRepairEntry e{}; + e.Valid = 1U; + e.Socket = socNum & 0x7U; + e.Channel = static_cast((mcaIpid >> 20) & 0xFU); + e.ChipSelect = static_cast( mcaSynd & 0x7U); + e.SubChannel = static_cast((mcaSynd >> 3) & 0x1U); + e.ErrorCause = corrected ? 1U : 3U; + e.Device = 0x1FU; + + if (ece == errCodeDramEcc) + { + e.Row = static_cast( mcaAddr & 0x3FFFFU); + e.Bank = static_cast ((mcaAddr >> 18) & 0x1FU); + e.Column = 0; + e.AddressLo = static_cast((mcaAddr >> 4) & 0xFFFF'FFFFU); + e.AddressHi = static_cast((mcaAddr >> 36) & 0xFU); + } + else + { + e.Row = static_cast( mcaAddr & 0x3FFFFU); + e.Bank = static_cast ((mcaAddr >> 18) & 0x1FU); + e.Column = 0; + } + + // RT payload + e.RepairType = 0U; + uint16_t rtPayload[10]{}; + packPayload(e, rtPayload); + + // Pack BT payload + DpprclRepairEntry eBt = e; + eBt.AddressLo = 0U; + eBt.AddressHi = 0U; + uint16_t btPayload[10]{}; + packPayload(eBt, btPayload); + + if (corrected) + { + const std::string rtPath = buildPprPath(errCount, node, isDram, false); + writeJson(rtPath, pprRepairTypeRtSoft, socNum, rtPayload); + } + + const std::string btPath = buildPprPath(errCount, node, isDram, true); + writeJson(btPath, pprRepairTypeBtSoft, socNum, btPayload); + } +} + +} // namespace ppr +} // namespace util +} // namespace ras +} // namespace amd From f1466a9a00661bf5062ffb5d79b89be4caf5ec08 Mon Sep 17 00:00:00 2001 From: aasaitha Date: Fri, 20 Mar 2026 23:30:41 +0000 Subject: [PATCH 2/3] Fix the shift level in the register parsing Signed-off-by: aasaitha --- include/utils/ppr_json.hpp | 9 +++++---- src/utils/ppr_json.cpp | 16 ++++++++-------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/include/utils/ppr_json.hpp b/include/utils/ppr_json.hpp index df01470..e5ef5e3 100644 --- a/include/utils/ppr_json.hpp +++ b/include/utils/ppr_json.hpp @@ -16,8 +16,8 @@ namespace ppr { // UMC bank identification from MCA_IPID -// hardware_id = (MCA_IPID >> 32) & 0xFFF => bits[43:32] of 64-bit IPID -// mca_type = MCA_IPID & 0xFFF => bits[11: 0] +// hardware_id = (MCA_IPID >> 32) & 0xFFF => bits[43:32] of 64-bit IPID +// mca_type = (MCA_IPID >> 48) & 0xFFFF => bits[63:48] of 64-bit IPID constexpr uint32_t umcHardwareId = 0x96; constexpr uint32_t umcMcaType = 0x00; @@ -25,8 +25,9 @@ constexpr uint32_t umcMcaType = 0x00; constexpr uint32_t errCodeDramEcc = 0x00; // DramEccErr constexpr uint32_t errCodeEcsRow = 0x08; // EcsRowErr -constexpr uint32_t pprRepairTypeRtSoft = 0x0000; // RUNTIME_SOFT -constexpr uint32_t pprRepairTypeBtSoft = 0x8000; // BOOTTIME_SOFT + +constexpr uint32_t pprRepairTypeRtSoft = 0x0000; // RUNTIME_SOFT +constexpr uint32_t pprRepairTypeBtSoft = 0x8000; // BOOTTIME_SOFT // mcaErr offsets -> word index: // MCA_STATUS_LO offset 0x08 -> DumpData[2] diff --git a/src/utils/ppr_json.cpp b/src/utils/ppr_json.cpp index 43b06c8..57485c4 100644 --- a/src/utils/ppr_json.cpp +++ b/src/utils/ppr_json.cpp @@ -44,7 +44,7 @@ struct DpprclRepairEntry uint32_t AddressHi{0}; // 32 bits physical address [63:32] }; -// Payload packing +// Payload packing // Payload[0] = bits[ 15: 0] // Payload[1] = bits[ 31: 16] // Payload[2] = bits[ 47: 32] @@ -150,6 +150,7 @@ void generatePprJsonFiles(const std::shared_ptr& ptr, { if (!ptr || !ptr->McaErrorInfo) { + lg2::error("PPR JSON: generatePprJsonFiles - null ptr or McaErrorInfo, skipping"); return; } @@ -188,7 +189,7 @@ void generatePprJsonFiles(const std::shared_ptr& ptr, // UMC bank detection const uint32_t hwId = static_cast((mcaIpid >> 32) & 0xFFFU); - const uint32_t mcaType = static_cast( mcaIpid & 0xFFFU); + const uint32_t mcaType = static_cast((mcaIpid >> 48) & 0xFFFFU); if (hwId != umcHardwareId || mcaType != umcMcaType) { @@ -218,14 +219,14 @@ void generatePprJsonFiles(const std::shared_ptr& ptr, DpprclRepairEntry e{}; e.Valid = 1U; e.Socket = socNum & 0x7U; - e.Channel = static_cast((mcaIpid >> 20) & 0xFU); - e.ChipSelect = static_cast( mcaSynd & 0x7U); - e.SubChannel = static_cast((mcaSynd >> 3) & 0x1U); + e.Channel = static_cast(((mcaIpid >> 20) & 0xFU) / 2U); e.ErrorCause = corrected ? 1U : 3U; e.Device = 0x1FU; if (ece == errCodeDramEcc) { + e.ChipSelect = static_cast( mcaSynd & 0x7U); + e.SubChannel = static_cast((mcaSynd >> 4) & 0x1U); e.Row = static_cast( mcaAddr & 0x3FFFFU); e.Bank = static_cast ((mcaAddr >> 18) & 0x1FU); e.Column = 0; @@ -234,12 +235,11 @@ void generatePprJsonFiles(const std::shared_ptr& ptr, } else { - e.Row = static_cast( mcaAddr & 0x3FFFFU); - e.Bank = static_cast ((mcaAddr >> 18) & 0x1FU); + e.Bank = static_cast((mcaAddr >> 18) & 0x1FU); e.Column = 0; } - // RT payload + // Pack RT payload e.RepairType = 0U; uint16_t rtPayload[10]{}; packPayload(e, rtPayload); From 39a87ccb9c986e1b97ae9b059ceb28758c8aa8b3 Mon Sep 17 00:00:00 2001 From: aasaitha Date: Sun, 22 Mar 2026 22:34:07 +0000 Subject: [PATCH 3/3] Add the Address translation bits Signed-off-by: aasaitha --- include/utils/ppr_json.hpp | 7 ++-- src/utils/ppr_json.cpp | 72 +++++++++++++++++++++++++++----------- 2 files changed, 55 insertions(+), 24 deletions(-) diff --git a/include/utils/ppr_json.hpp b/include/utils/ppr_json.hpp index e5ef5e3..69e2247 100644 --- a/include/utils/ppr_json.hpp +++ b/include/utils/ppr_json.hpp @@ -25,9 +25,8 @@ constexpr uint32_t umcMcaType = 0x00; constexpr uint32_t errCodeDramEcc = 0x00; // DramEccErr constexpr uint32_t errCodeEcsRow = 0x08; // EcsRowErr - -constexpr uint32_t pprRepairTypeRtSoft = 0x0000; // RUNTIME_SOFT -constexpr uint32_t pprRepairTypeBtSoft = 0x8000; // BOOTTIME_SOFT +constexpr uint32_t pprRepairTypeRtSoft = 0x0000; +constexpr uint32_t pprRepairTypeBtSoft = 0x8000; // mcaErr offsets -> word index: // MCA_STATUS_LO offset 0x08 -> DumpData[2] @@ -37,6 +36,8 @@ constexpr uint32_t pprRepairTypeBtSoft = 0x8000; // BOOTTIME_SOFT // MCA_IPID_LO offset 0x28 -> DumpData[10] // MCA_IPID_HI offset 0x2C -> DumpData[11] // MCA_SYND_LO offset 0x30 -> DumpData[12] +// TRANS_ADDR_LO offset 0x70 -> DumpData[28] +// TRANS_ADDR_HI offset 0x74 -> DumpData[29] // // dramCeccErr adds baseOffset=4, so indices shift down by 1. // Use isDram=true to apply the -1 shift. diff --git a/src/utils/ppr_json.cpp b/src/utils/ppr_json.cpp index 57485c4..1e528bc 100644 --- a/src/utils/ppr_json.cpp +++ b/src/utils/ppr_json.cpp @@ -29,8 +29,8 @@ struct DpprclRepairEntry uint8_t Valid{0}; // 1 bit uint32_t Row{0}; // 18 bits uint8_t RankMultiplier{0}; // 3 bits - uint8_t Channel{0}; // 4 bits UMC channel (IPID[23:20]) - uint8_t SubChannel{0}; // 1 bit sub-channel (SYND[3]) + uint8_t Channel{0}; // 4 bits UMC channel + uint8_t SubChannel{0}; // 1 bit sub-channel uint8_t HardPPRDone{0}; // 1 bit (set by CPU after hPPR) uint8_t PPRUndo{0}; // 1 bit uint8_t PPRLock{0}; // 1 bit @@ -45,6 +45,7 @@ struct DpprclRepairEntry }; // Payload packing +// // Payload[0] = bits[ 15: 0] // Payload[1] = bits[ 31: 16] // Payload[2] = bits[ 47: 32] @@ -150,21 +151,23 @@ void generatePprJsonFiles(const std::shared_ptr& ptr, { if (!ptr || !ptr->McaErrorInfo) { - lg2::error("PPR JSON: generatePprJsonFiles - null ptr or McaErrorInfo, skipping"); + lg2::error("PPR JSON: null ptr or McaErrorInfo, skipping"); return; } // wOff = -1 for dramCeccErr, 0 for mcaErr. const int wOff = isDram ? -1 : 0; - // Baseline word indices for mcaErr path (baseOffset = 0): - constexpr int kStatusLo = 2; // offset 0x08 - constexpr int kStatusHi = 3; // offset 0x0C - constexpr int kAddrLo = 4; // offset 0x10 (MCA_ADDR) - constexpr int kAddrHi = 5; // offset 0x14 - constexpr int kIpidLo = 10; // offset 0x28 (MCA_IPID) - constexpr int kIpidHi = 11; // offset 0x2C - constexpr int kSyndLo = 12; // offset 0x30 (MCA_SYND) + // Baseline word indices for mcaErr path (baseOffset=0): + constexpr int kStatusLo = 2; // offset 0x08 (MCA_STATUS_LO) + constexpr int kStatusHi = 3; // offset 0x0C (MCA_STATUS_HI) + constexpr int kAddrLo = 4; // offset 0x10 (MCA_ADDR_LO) + constexpr int kAddrHi = 5; // offset 0x14 (MCA_ADDR_HI) + constexpr int kIpidLo = 10; // offset 0x28 (MCA_IPID_LO) + constexpr int kIpidHi = 11; // offset 0x2C (MCA_IPID_HI) + constexpr int kSyndLo = 12; // offset 0x30 (MCA_SYND_LO) + constexpr int kTransAddrLo = 28; // offset 0x70 (TRANS_ADDR_LO) + constexpr int kTransAddrHi = 29; // offset 0x74 (TRANS_ADDR_HI) for (uint16_t s = sectionStart; s < sectionStart + sectionCount; ++s) { @@ -187,6 +190,11 @@ void generatePprJsonFiles(const std::shared_ptr& ptr, const uint32_t mcaSynd = d[kSyndLo + wOff]; + const uint64_t transAddr = + (static_cast(d[kTransAddrHi]) << 32) | + d[kTransAddrLo]; + const bool transAddrValid = static_cast((transAddr >> 62) & 0x1U); + // UMC bank detection const uint32_t hwId = static_cast((mcaIpid >> 32) & 0xFFFU); const uint32_t mcaType = static_cast((mcaIpid >> 48) & 0xFFFFU); @@ -215,28 +223,50 @@ void generatePprJsonFiles(const std::shared_ptr& ptr, continue; } - // Fill PPR entry + // Fill repair entry DpprclRepairEntry e{}; e.Valid = 1U; - e.Socket = socNum & 0x7U; - e.Channel = static_cast(((mcaIpid >> 20) & 0xFU) / 2U); - e.ErrorCause = corrected ? 1U : 3U; - e.Device = 0x1FU; + e.ErrorCause = corrected ? 1U : 3U; // 1=corrected, 3=deferred + // Python: Device = transaddr >> 42 & 0x1f when deferred (no valid-bit gate; + // when transaddr=0, this naturally gives 0). + // Device = 0x1F always for corrected errors. + e.Device = deferOnly + ? static_cast((transAddr >> 42) & 0x1FU) + : 0x1FU; + + const uint8_t iod = static_cast((mcaIpid >> 44) & 0xFU); + const uint8_t umcN = static_cast((mcaIpid >> 20) & 0xFU); if (ece == errCodeDramEcc) { e.ChipSelect = static_cast( mcaSynd & 0x7U); e.SubChannel = static_cast((mcaSynd >> 4) & 0x1U); - e.Row = static_cast( mcaAddr & 0x3FFFFU); - e.Bank = static_cast ((mcaAddr >> 18) & 0x1FU); - e.Column = 0; + + if (transAddrValid) + { + e.Bank = static_cast ((transAddr >> 2) & 0x1FU); + e.Column = static_cast((transAddr >> 25) & 0x7FFU); + e.Row = static_cast((transAddr >> 7) & 0x3FFFFU); + e.RankMultiplier = static_cast ((transAddr >> 36) & 0x7U); + e.Channel = static_cast (umcN / 2U + iod * 8U); + e.Socket = static_cast ((transAddr >> 58) & 0x7U); + } e.AddressLo = static_cast((mcaAddr >> 4) & 0xFFFF'FFFFU); e.AddressHi = static_cast((mcaAddr >> 36) & 0xFU); } else { - e.Bank = static_cast((mcaAddr >> 18) & 0x1FU); - e.Column = 0; + e.Bank = static_cast((mcaAddr >> 18) & 0x1FU); + e.Column = 0; + e.Channel = static_cast(umcN / 2U + iod * 8U); + e.Socket = static_cast((transAddr >> 58) & 0x7U); + + if (transAddrValid) + { + e.RankMultiplier = static_cast ((transAddr >> 36) & 0x7U); + e.AddressLo = static_cast( transAddr & 0xFFFF'FFFFU); + e.AddressHi = static_cast((transAddr >> 32) & 0xFFU); + } } // Pack RT payload