Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,664 changes: 1,664 additions & 0 deletions 0001-Add-MPX-trace-log-harvesting-and-on-demand-support.patch

Large diffs are not rendered by default.

106 changes: 106 additions & 0 deletions config/mp_info.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
[
{
"Name": "IOD0_MPART",
"Index": 0
},
{
"Name": "IOD0_MPASP",
"Index": 1
},
{
"Name": "IOD0_MP1",
"Index": 2
},
{
"Name": "IOD0_MPIO",
"Index": 3
},
{
"Name": "IOD0_MPRAS",
"Index": 4
},
{
"Name": "IOD0_MPM",
"Index": 5
},
{
"Name": "IOD0_MPDACC0",
"Index": 6
},
{
"Name": "IOD0_MPDACC1",
"Index": 7
},
{
"Name": "IOD0_MPDACC2",
"Index": 8
},
{
"Name": "IOD0_CCD0_MP5",
"Index": 9
},
{
"Name": "IOD0_CCD1_MP5",
"Index": 10
},
{
"Name": "IOD0_CCD2_MP5",
"Index": 11
},
{
"Name": "IOD0_CCD3_MP5",
"Index": 12
},
{
"Name": "IOD1_MPART",
"Index": 13
},
{
"Name": "IOD1_MPASP",
"Index": 14
},
{
"Name": "IOD1_MP1",
"Index": 15
},
{
"Name": "IOD1_MPIO",
"Index": 16
},
{
"Name": "IOD1_MPRAS",
"Index": 17
},
{
"Name": "IOD1_MPM",
"Index": 18
},
{
"Name": "IOD1_MPDACC0",
"Index": 19
},
{
"Name": "IOD1_MPDACC1",
"Index": 20
},
{
"Name": "IOD1_MPDACC2",
"Index": 21
},
{
"Name": "IOD1_CCD0_MP5",
"Index": 22
},
{
"Name": "IOD1_CCD1_MP5",
"Index": 23
},
{
"Name": "IOD1_CCD2_MP5",
"Index": 24
},
{
"Name": "IOD1_CCD3_MP5",
"Index": 25
}
]
18 changes: 18 additions & 0 deletions config/ras_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,24 @@
"Description": "Error threshold count for PCIE AER errors",
"Value": 1
}
},
{
"Soc0MPList": {
"Description": "List of Socket 0 MP's where tracelogs are enabled",
"Value": [
"IOD0_MPART",
"IOD0_MPASP"
]
}
},
{
"Soc1MPList": {
"Description": "List ofSocket 1 MP's where tracelogs are enabled",
"Value": [
"IOD0_MPART",
"IOD0_MPASP"
]
}
}
]
}
17 changes: 17 additions & 0 deletions include/apml_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ class Manager : public amd::ras::Manager
uint8_t progId;
size_t contextType;
uint64_t recordId;
uint16_t fatalSectionCount;
size_t watchdogTimerCounter;
boost::asio::io_context& io;
std::vector<uint8_t> blockId;
Expand Down Expand Up @@ -361,6 +362,22 @@ class Manager : public amd::ras::Manager
*/
void getLastTransAddr(const std::shared_ptr<FatalCperRecord>&, uint8_t);

/** @brief Harvests MPX trace logs.
*
* @details This function collects MPX trace logs from the system
* when a fatal error occurs.
*
* @param[in] fatalPtr - Shared pointer to a FatalCperRecord.
* @param[in] socNum - The SoC number for which the trace logs are
* harvested.
* @param[out] mpList - List of MP's for which trace data is collected.
* @param[in] baseSectionCount - The base section count used for cper offset
* calculation
*/

void harvestMpxTraceLog(const std::shared_ptr<FatalCperRecord>&, uint8_t,
std::vector<std::string>&, uint8_t);

/** @brief Harvests debug log ID dump data
*
* @details This function harvests the debug log ID data for the list
Expand Down
3 changes: 3 additions & 0 deletions include/base_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#include "config_manager.hpp"
#include "oem_cper.hpp"

#include <map>

constexpr size_t socket1 = 1;
constexpr size_t socket2 = 2;

Expand Down Expand Up @@ -71,6 +73,7 @@ class Manager
std::shared_ptr<McaRuntimeCperRecord> mcaPtr;
std::shared_ptr<McaRuntimeCperRecord> dramPtr;
std::shared_ptr<PcieRuntimeCperRecord> pciePtr;
std::vector<std::pair<std::string, int>> mpToIndexMap;
std::string node;
std::vector<size_t> socIndex;

Expand Down
12 changes: 12 additions & 0 deletions include/config_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@ namespace config
static constexpr auto service = "com.amd.RAS";
static constexpr auto objectPath = "/com/amd/RAS";

constexpr size_t maxErrorIndex = 256;
constexpr auto errorCountFile = "/var/lib/amd-bmc-ras/error_count.json";

extern std::array<uint64_t, maxErrorIndex> correctableCPUErrors;
extern std::array<uint64_t, maxErrorIndex> noncorrectableCPUErrors;
extern std::array<uint64_t, maxErrorIndex> correctableOtherErrors;
extern std::array<uint64_t, maxErrorIndex> noncorrectableOtherErrors;

using ConfigIface = sdbusplus::server::object_t<
sdbusplus::com::amd::RAS::server::Configuration,
sdbusplus::xyz::openbmc_project::Collection::server::DeleteAll>;
Expand Down Expand Up @@ -100,6 +108,10 @@ class Manager : public amd::ras::config::ConfigIface
*/
void deleteAll() override;

void loadErrorCounts();

void saveErrorCounts();

private:
sdbusplus::asio::object_server& objServer;
std::shared_ptr<sdbusplus::asio::connection>& systemBus;
Expand Down
15 changes: 15 additions & 0 deletions include/oem_cper.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#pragma once

#include <fstream>

extern "C"
{
#include "libcper/Cper.h"
Expand All @@ -12,6 +14,7 @@ constexpr size_t length8 = 8;
constexpr size_t length32 = 32;
constexpr size_t length96 = 96;
constexpr size_t length91 = 91;
constexpr size_t traceBufferDataLen = 2048;

struct CrashdumpData
{
Expand Down Expand Up @@ -51,6 +54,13 @@ struct AmdFatalErrorData

using EFI_AMD_FATAL_ERROR_DATA = AmdFatalErrorData;

struct AmdMpTraceBufferData
{
uint32_t TracelogData[traceBufferDataLen];
} __attribute__((packed));

using EFI_AMD_MP_TRACELOG_DATA = AmdMpTraceBufferData;

struct RuntimeErrorInfo
{
EFI_IA32_X64_PROCESSOR_ERROR_RECORD ProcError;
Expand All @@ -66,6 +76,11 @@ struct FatalCperRecord
EFI_COMMON_ERROR_RECORD_HEADER Header;
EFI_ERROR_SECTION_DESCRIPTOR* SectionDescriptor;
EFI_AMD_FATAL_ERROR_DATA* ErrorRecord;
EFI_AMD_MP_TRACELOG_DATA* TraceBufferRecord;
FatalCperRecord() :
SectionDescriptor(nullptr), ErrorRecord(nullptr),
TraceBufferRecord(nullptr)
{}
} __attribute__((packed));

struct McaRuntimeCperRecord
Expand Down
90 changes: 90 additions & 0 deletions include/tbai_manager.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#pragma once

#include "crashdump_manager.hpp"
#include "oem_cper.hpp"

#include <com/amd/Dump/Create/server.hpp>
#include <sdbusplus/asio/object_server.hpp>
#include <sdbusplus/server.hpp>
#include <xyz/openbmc_project/Dump/Create/server.hpp>

namespace amd
{
namespace ras
{
namespace tbai
{
static constexpr auto service = "com.amd.RAS";
static constexpr auto objectPath = "/com/amd/TBAI";

using createDumpIface = sdbusplus::server::object_t<
sdbusplus::xyz::openbmc_project::Dump::server::Create,
sdbusplus::com::amd::Dump::server::Create>;

using DumpCreateParams =
std::map<std::string, std::variant<std::string, uint64_t>>;

/**
* @brief Manager class which adds the RAS configuration
* parameter values to the D-Bus interface.
*
* @details The class pulls the default values of ras_config.json file
* into the D-Bus interface and overrides the getAttribute()
* and setAttribute() of the RAS configuration interface.
*
* @param[in] manager - Reference to the TBAI manager.
* @param[in] objectServer - The D-Bus object server.
* @param[in] systemBus - Shared pointer to the D-Bus system bus connection.
* @param[in] io - Boost ASIO I/O context for asynchronous operations.
* @param[in] node - host node number to determine single or multi host.
*/
class Manager : virtual public createDumpIface
{
public:
Manager() = delete;
Manager(const Manager&) = delete;
Manager& operator=(const Manager&) = delete;
Manager(Manager&&) = delete;
Manager& operator=(Manager&&) = delete;
~Manager() = default;

Manager(sdbusplus::asio::object_server&,
std::shared_ptr<sdbusplus::asio::connection>&,
boost::asio::io_context&, std::string&);

/** @brief Implementation for CreateDump
* Method to create Dump.
*
* @return object_path - The object path of the new entry.
*/
sdbusplus::message::object_path
createDump(DumpCreateParams params) override;

private:
sdbusplus::asio::object_server& objServer;
std::shared_ptr<sdbusplus::asio::connection>& systemBus;
boost::asio::io_context& io;

std::string node;
std::string mpName;

std::mutex tbaiMutex;

std::vector<std::pair<std::string, int>> mpToIndexMap;

size_t currentSectionCount;
size_t inputSocNum;
size_t cpuCount;

std::vector<size_t> socIndex;

std::shared_ptr<FatalCperRecord> mpxPtr;

std::map<int, std::unique_ptr<CrashdumpInterface>> tbaiRecordMgr;

void harvestMpxTraceLog(DumpCreateParams params);
};

} // namespace tbai
} // namespace ras
} // namespace amd
5 changes: 4 additions & 1 deletion include/utils/cper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ static constexpr std::string_view runtimeMcaErr = "RUNTIME_MCA_ERROR";
static constexpr std::string_view runtimePcieErr = "RUNTIME_PCIE_ERROR";
static constexpr std::string_view runtimeDramErr = "RUNTIME_DRAM_ERROR";
static constexpr std::string_view fatalErr = "FATAL";
static constexpr std::string_view mpxTracelog = "MPX_TRACE_LOG";

namespace amd
{
Expand All @@ -25,6 +26,8 @@ constexpr uint8_t addcGenNumber3 = 0x03;
constexpr uint8_t familyId1ah = 0x1A;
constexpr uint16_t pcieVendorId = 0x1022;
constexpr uint8_t minorRevision = 0xB;
constexpr size_t maxByte = 0xFF;
constexpr size_t quadBit = 4;

/** @brief Finds a filename in the RAS directory that matches a given pattern.
*
Expand Down Expand Up @@ -194,7 +197,7 @@ void dumpContext(const std::shared_ptr<FatalCperRecord>&, uint16_t numbanks,
*/
template <typename T>
void createFile(const std::shared_ptr<T>&, const std::string_view&, uint16_t,
size_t&, const std::string&);
size_t&, const std::string&, const std::string& tbaiFileName = "");

/** @brief Checks if the signature ID matches the configuration list.
*
Expand Down
4 changes: 4 additions & 0 deletions include/utils/util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,10 @@ ReturnType getProperty(sdbusplus::bus::bus&, const char*, const char*,
*/
bool checkObjPath(std::string);

void getCpuCount(size_t&, std::string&, std::vector<size_t>&);

void mpTraceLogInfo(std::vector<std::pair<std::string, int>>&);

} // namespace util
} // namespace ras
} // namespace amd
Loading