Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/oem_cper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ extern "C"
{
#include "libcper/Cper.h"
}
#include <cstddef>

constexpr uint8_t mcaDataBankLen = 128;
constexpr uint16_t debugDumpDataLen = 12124;
Expand Down
67 changes: 67 additions & 0 deletions include/utils/ppr_json.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#pragma once

#include "oem_cper.hpp"

#include <cstdint>
#include <memory>
#include <string>

namespace amd
{
namespace ras
{
namespace util
{
namespace ppr
{

// UMC bank identification from MCA_IPID
// hardware_id = (MCA_IPID >> 32) & 0xFFF => bits[43:32] of 64-bit IPID
// mca_type = (MCA_IPID >> 48) & 0xFFFF => bits[63:48] of 64-bit IPID
constexpr uint32_t umcHardwareId = 0x96;
constexpr uint32_t umcMcaType = 0x00;

// PPR trigger ErrorCodeExt values from MCA_STATUS[21:16]
constexpr uint32_t errCodeDramEcc = 0x00; // DramEccErr
constexpr uint32_t errCodeEcsRow = 0x08; // EcsRowErr

constexpr uint32_t pprRepairTypeRtSoft = 0x0000;
constexpr uint32_t pprRepairTypeBtSoft = 0x8000;

// mcaErr offsets -> word index:
// MCA_STATUS_LO offset 0x08 -> DumpData[2]
// MCA_STATUS_HI offset 0x0C -> DumpData[3]
// MCA_ADDR_LO offset 0x10 -> DumpData[4]
// MCA_ADDR_HI offset 0x14 -> DumpData[5]
// MCA_IPID_LO offset 0x28 -> DumpData[10]
// MCA_IPID_HI offset 0x2C -> DumpData[11]
// MCA_SYND_LO offset 0x30 -> DumpData[12]
// TRANS_ADDR_LO offset 0x70 -> DumpData[28]
// TRANS_ADDR_HI offset 0x74 -> DumpData[29]
//
// dramCeccErr adds baseOffset=4, so indices shift down by 1.
// Use isDram=true to apply the -1 shift.

/** @brief Scan ptr->McaErrorInfo[0..sectionCount-1] and if PPR needed
* write *_rtppr.json and/or *_btppr.json files to RAS_DIR.
*
*
* @param[in] ptr Shared pointer to the MCA or DRAM runtime CPER
* record.
* @param[in] sectionStart Index of the first section to scan.
* @param[in] sectionCount Number of sections to scan from sectionStart.
* @param[in] socNum Socket number.
* @param[in] errCount Current error file counter.
* @param[in] node Node string for filename prefix.
* @param[in] isDram true → dramCeccErr
* false → mcaErr
*/
void generatePprJsonFiles(const std::shared_ptr<McaRuntimeCperRecord>& ptr,
uint16_t sectionStart, uint16_t sectionCount,
uint8_t socNum, size_t errCount,
const std::string& node, bool isDram);

} // namespace ppr
} // namespace util
} // namespace ras
} // namespace amd
1 change: 1 addition & 0 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ sources = [
'src/apml_manager.cpp',
'src/utils/util.cpp',
'src/utils/cper.cpp',
'src/utils/ppr_json.cpp',
]

if apml
Expand Down
33 changes: 33 additions & 0 deletions src/apml_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "config_manager.hpp"
#include "oem_cper.hpp"
#include "utils/cper.hpp"
#include "utils/ppr_json.hpp"
#include "utils/util.hpp"

extern "C"
Expand Down Expand Up @@ -768,6 +769,22 @@ void Manager::harvestRuntimeErrors(uint8_t errorPollingType,
amd::ras::util::cper::dumpErrorDescriptor(
mcaPtr, sectionCount, runtimeMcaErr, severity, progId);

// Generate RTPPR / BTPPR JSON files from the in-memory MCA register
if (p0Inst.number_of_inst != 0)
{
amd::ras::util::ppr::generatePprJsonFiles(
mcaPtr, 0, p0Inst.number_of_inst,
static_cast<uint8_t>(socIndex[0]), errCount, node, false);
}
if (p1Inst.number_of_inst != 0)
{
amd::ras::util::ppr::generatePprJsonFiles(
mcaPtr,
static_cast<uint16_t>(sectionCount - p1Inst.number_of_inst),
p1Inst.number_of_inst,
static_cast<uint8_t>(socIndex[1]), errCount, node, false);
}

amd::ras::util::cper::createFile(mcaPtr, runtimeMcaErr, sectionCount,
errCount, node);

Expand Down Expand Up @@ -826,6 +843,22 @@ void Manager::harvestRuntimeErrors(uint8_t errorPollingType,
amd::ras::util::cper::dumpErrorDescriptor(
dramPtr, sectionCount, runtimeDramErr, severity, progId);

// Generate RTPPR / BTPPR JSON files from the in-memory DRAM CECC
if (p0Inst.number_of_inst != 0)
{
amd::ras::util::ppr::generatePprJsonFiles(
dramPtr, 0, p0Inst.number_of_inst,
static_cast<uint8_t>(socIndex[0]), errCount, node, true);
}
if (p1Inst.number_of_inst != 0)
{
amd::ras::util::ppr::generatePprJsonFiles(
dramPtr,
static_cast<uint16_t>(sectionCount - p1Inst.number_of_inst),
p1Inst.number_of_inst,
static_cast<uint8_t>(socIndex[1]), errCount, node, true);
}

amd::ras::util::cper::createFile(dramPtr, runtimeDramErr, sectionCount,
errCount, node);

Expand Down
Loading