Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial refactoring of AIE trace plugins #7813

Merged
merged 17 commits into from
Dec 7, 2023
2,280 changes: 2,280 additions & 0 deletions src/runtime_src/xdp/doxygen.config

Large diffs are not rendered by default.

453 changes: 282 additions & 171 deletions src/runtime_src/xdp/profile/database/static_info/aie_util.cpp

Large diffs are not rendered by default.

23 changes: 23 additions & 0 deletions src/runtime_src/xdp/profile/database/static_info/aie_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,38 @@ namespace xdp::aie {
xdp::aie::driver_config
getDriverConfig(const boost::property_tree::ptree& aie_meta,
const std::string& root);

XDP_CORE_EXPORT
uint16_t
getAIETileRowOffset(const boost::property_tree::ptree& aie_meta,
const std::string& location);

XDP_CORE_EXPORT
std::vector<std::string>
getValidGraphs(const boost::property_tree::ptree& aie_meta,
const std::string& root);

XDP_CORE_EXPORT
bool isInfoVerbosity();

XDP_CORE_EXPORT
bool isDebugVerbosity();

XDP_CORE_EXPORT
bool isInputSet(const module_type type, const std::string metricSet);

XDP_CORE_EXPORT
uint16_t getRelativeRow(uint16_t absRow, uint16_t rowOffset);

XDP_CORE_EXPORT
module_type getModuleType(uint16_t absRow, uint16_t rowOffset);

XDP_CORE_EXPORT
uint32_t bcIdToEvent(int bcId);

XDP_CORE_EXPORT
std::string getModuleName(module_type mod);

} // namespace xdp::aie

#endif
3 changes: 0 additions & 3 deletions src/runtime_src/xdp/profile/device/device_trace_logger.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,6 @@ namespace xdp {
// start event, end event
// start end must have created already
// check if the memory ports on current cu has any event

uint64_t cuLastTimestamp = amLastTrans[amIndex];

// get CU Id for the current slot
Expand All @@ -434,7 +433,6 @@ namespace xdp {
// To reduce overhead, first check the timestamp.
// If last activity timestamp on CU is earlier than current AIM, then only check
// whether the current AIM is attached to the same CU.
//
if(cuLastTimestamp >= aimLastTrans[aimIndex]) {
continue;
}
Expand All @@ -452,7 +450,6 @@ namespace xdp {
// To reduce overhead, first check the timestamp.
// If last activity timestamp on CU is earlier than current ASM, then only check
// whether the current ASM is attached to the same CU.
///
if(cuLastTimestamp >= asmLastTrans[asmIndex]) {
continue;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,10 @@ namespace xdp {
XAIE_EVENT_INSTR_CASCADE_PUT_CORE, XAIE_EVENT_GROUP_CORE_STALL_CORE}},
{"read_throughputs", {XAIE_EVENT_ACTIVE_CORE, XAIE_EVENT_INSTR_STREAM_GET_CORE,
XAIE_EVENT_INSTR_CASCADE_GET_CORE, XAIE_EVENT_GROUP_CORE_STALL_CORE}},
{"aie_trace", {XAIE_EVENT_PORT_RUNNING_1_CORE, XAIE_EVENT_PORT_STALLED_1_CORE,
XAIE_EVENT_PORT_RUNNING_0_CORE, XAIE_EVENT_PORT_STALLED_0_CORE}},
{"s2mm_throughputs", {XAIE_EVENT_PORT_RUNNING_0_CORE, XAIE_EVENT_PORT_STALLED_0_CORE}},
{"mm2s_throughputs", {XAIE_EVENT_PORT_RUNNING_0_CORE, XAIE_EVENT_PORT_STALLED_0_CORE}},
{"aie_trace", {XAIE_EVENT_PORT_RUNNING_0_CORE, XAIE_EVENT_PORT_STALLED_0_CORE,
XAIE_EVENT_PORT_RUNNING_1_CORE, XAIE_EVENT_PORT_STALLED_1_CORE}},
{"events", {XAIE_EVENT_INSTR_EVENT_0_CORE, XAIE_EVENT_INSTR_EVENT_1_CORE,
XAIE_EVENT_USER_EVENT_0_CORE, XAIE_EVENT_USER_EVENT_1_CORE}}
};
Expand All @@ -104,8 +106,6 @@ namespace xdp {
else {
mCoreStartEvents["floating_point"] = {XAIE_EVENT_FP_HUGE_CORE, XAIE_EVENT_INT_FP_0_CORE,
XAIE_EVENT_FP_INVALID_CORE, XAIE_EVENT_FP_INF_CORE};
mCoreStartEvents["s2mm_throughputs"] = {XAIE_EVENT_PORT_RUNNING_0_CORE, XAIE_EVENT_PORT_STALLED_0_CORE};
mCoreStartEvents["mm2s_throughputs"] = {XAIE_EVENT_PORT_RUNNING_0_CORE, XAIE_EVENT_PORT_STALLED_0_CORE};
}
mCoreEndEvents = mCoreStartEvents;

Expand Down Expand Up @@ -179,34 +179,32 @@ namespace xdp {
{"mem_trace", {XAIE_EVENT_PORT_RUNNING_0_MEM_TILE,
XAIE_EVENT_PORT_STALLED_0_MEM_TILE,
XAIE_EVENT_PORT_IDLE_0_MEM_TILE,
XAIE_EVENT_PORT_TLAST_0_MEM_TILE}}
XAIE_EVENT_PORT_TLAST_0_MEM_TILE}},
{"input_throughputs", {XAIE_EVENT_PORT_RUNNING_0_MEM_TILE,
XAIE_EVENT_DMA_S2MM_SEL0_STREAM_STARVATION_MEM_TILE,
XAIE_EVENT_DMA_S2MM_SEL0_MEMORY_BACKPRESSURE_MEM_TILE,
XAIE_EVENT_DMA_S2MM_SEL0_STALLED_LOCK_ACQUIRE_MEM_TILE}},
{"output_throughputs", {XAIE_EVENT_PORT_RUNNING_0_MEM_TILE,
XAIE_EVENT_DMA_MM2S_SEL0_STREAM_BACKPRESSURE_MEM_TILE,
XAIE_EVENT_DMA_MM2S_SEL0_MEMORY_STARVATION_MEM_TILE,
XAIE_EVENT_DMA_MM2S_SEL0_STALLED_LOCK_ACQUIRE_MEM_TILE}},
{"conflict_stats1", {XAIE_EVENT_CONFLICT_DM_BANK_0_MEM_TILE,
XAIE_EVENT_CONFLICT_DM_BANK_1_MEM_TILE,
XAIE_EVENT_CONFLICT_DM_BANK_2_MEM_TILE,
XAIE_EVENT_CONFLICT_DM_BANK_3_MEM_TILE}},
{"conflict_stats2", {XAIE_EVENT_CONFLICT_DM_BANK_4_MEM_TILE,
XAIE_EVENT_CONFLICT_DM_BANK_5_MEM_TILE,
XAIE_EVENT_CONFLICT_DM_BANK_6_MEM_TILE,
XAIE_EVENT_CONFLICT_DM_BANK_7_MEM_TILE}},
{"conflict_stats3", {XAIE_EVENT_CONFLICT_DM_BANK_8_MEM_TILE,
XAIE_EVENT_CONFLICT_DM_BANK_9_MEM_TILE,
XAIE_EVENT_CONFLICT_DM_BANK_10_MEM_TILE,
XAIE_EVENT_CONFLICT_DM_BANK_11_MEM_TILE}},
{"conflict_stats4", {XAIE_EVENT_CONFLICT_DM_BANK_12_MEM_TILE,
XAIE_EVENT_CONFLICT_DM_BANK_13_MEM_TILE,
XAIE_EVENT_CONFLICT_DM_BANK_14_MEM_TILE,
XAIE_EVENT_CONFLICT_DM_BANK_15_MEM_TILE}}
};
if (metadata->getHardwareGen() > 1) {
mMemTileStartEvents["input_throughputs"] = {XAIE_EVENT_PORT_RUNNING_0_MEM_TILE,
XAIE_EVENT_DMA_S2MM_SEL0_STREAM_STARVATION_MEM_TILE,
XAIE_EVENT_DMA_S2MM_SEL0_MEMORY_BACKPRESSURE_MEM_TILE,
XAIE_EVENT_DMA_S2MM_SEL0_STALLED_LOCK_ACQUIRE_MEM_TILE};
mMemTileStartEvents["output_throughputs"] = {XAIE_EVENT_PORT_RUNNING_0_MEM_TILE,
XAIE_EVENT_DMA_MM2S_SEL0_STREAM_BACKPRESSURE_MEM_TILE,
XAIE_EVENT_DMA_MM2S_SEL0_MEMORY_STARVATION_MEM_TILE,
XAIE_EVENT_DMA_MM2S_SEL0_STALLED_LOCK_ACQUIRE_MEM_TILE};
mMemTileStartEvents["conflict_stats1"] = {XAIE_EVENT_CONFLICT_DM_BANK_0_MEM_TILE,
XAIE_EVENT_CONFLICT_DM_BANK_1_MEM_TILE,
XAIE_EVENT_CONFLICT_DM_BANK_2_MEM_TILE,
XAIE_EVENT_CONFLICT_DM_BANK_3_MEM_TILE};
mMemTileStartEvents["conflict_stats2"] = {XAIE_EVENT_CONFLICT_DM_BANK_4_MEM_TILE,
XAIE_EVENT_CONFLICT_DM_BANK_5_MEM_TILE,
XAIE_EVENT_CONFLICT_DM_BANK_6_MEM_TILE,
XAIE_EVENT_CONFLICT_DM_BANK_7_MEM_TILE};
mMemTileStartEvents["conflict_stats3"] = {XAIE_EVENT_CONFLICT_DM_BANK_8_MEM_TILE,
XAIE_EVENT_CONFLICT_DM_BANK_9_MEM_TILE,
XAIE_EVENT_CONFLICT_DM_BANK_10_MEM_TILE,
XAIE_EVENT_CONFLICT_DM_BANK_11_MEM_TILE};
mMemTileStartEvents["conflict_stats4"] = {XAIE_EVENT_CONFLICT_DM_BANK_12_MEM_TILE,
XAIE_EVENT_CONFLICT_DM_BANK_13_MEM_TILE,
XAIE_EVENT_CONFLICT_DM_BANK_14_MEM_TILE,
XAIE_EVENT_CONFLICT_DM_BANK_15_MEM_TILE};
}
mMemTileStartEvents["s2mm_channels"] = mMemTileStartEvents["input_channels"];
mMemTileStartEvents["s2mm_channels_details"] = mMemTileStartEvents["input_channels_details"];
mMemTileStartEvents["s2mm_throughputs"] = mMemTileStartEvents["input_throughputs"];
Expand Down Expand Up @@ -391,6 +389,7 @@ namespace xdp {

bool newPort = false;
auto portnum = getPortNumberFromEvent(startEvent);
uint8_t channel = (portnum == 0) ? channel0 : channel1;

// New port needed: reserver, configure, and store
if (switchPortMap.find(portnum) == switchPortMap.end()) {
Expand All @@ -401,10 +400,32 @@ namespace xdp {
switchPortMap[portnum] = switchPortRsc;

if (type == module_type::core) {
// AIE Tiles (e.g., trace streams)
// Define stream switch port to monitor core or memory trace
uint8_t traceSelect = (startEvent == XAIE_EVENT_PORT_RUNNING_0_CORE) ? 0 : 1;
switchPortRsc->setPortToSelect(XAIE_STRMSW_SLAVE, TRACE, traceSelect);
int channelNum = 0;
std::string portName;

// AIE Tiles
if (metricSet.find("trace") != std::string::npos) {
// Monitor memory or core trace (memory:1, core:0)
uint8_t traceSelect = (startEvent == XAIE_EVENT_PORT_RUNNING_0_CORE) ? 1 : 0;
switchPortRsc->setPortToSelect(XAIE_STRMSW_SLAVE, TRACE, traceSelect);

channelNum = traceSelect;
portName = (traceSelect == 0) ? "core trace" : "memory trace";
}
else {
auto slaveOrMaster = aie::isInputSet(type, metricSet) ? XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER;
switchPortRsc->setPortToSelect(slaveOrMaster, DMA, channel);

channelNum = channel;
portName = aie::isInputSet(type, metricSet) ? "DMA MM2S" : "DMA S2MM";
}

if (aie::isDebugVerbosity()) {
std::stringstream msg;
msg << "Configured core module stream switch to monitor " << portName
<< " for metric set " << metricSet << " and channel " << channelNum;
xrt_core::message::send(severity_level::debug, "XRT", msg.str());
}
}
else if (type == module_type::shim) {
// Interface tiles (e.g., PLIO, GMIO)
Expand All @@ -419,7 +440,6 @@ namespace xdp {
switchPortRsc->setPortToSelect(XAIE_STRMSW_SLAVE, TRACE, 0);
}
else {
uint8_t channel = (portnum == 0) ? channel0 : channel1;
auto slaveOrMaster = isInputSet(type, metricSet) ? XAIE_STRMSW_MASTER : XAIE_STRMSW_SLAVE;
switchPortRsc->setPortToSelect(slaveOrMaster, DMA, channel);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ file(GLOB AIE_TRACE_PLUGIN_FILES
"${IMPL_DIR}/*.h"
"${IMPL_DIR}/*.cpp"
)
file(GLOB AIE_TRACE_UTIL_FILES
"${PROFILE_DIR}/plugin/aie_trace/util/*.h"
"${PROFILE_DIR}/plugin/aie_trace/util/*.cpp"
)

if (XDP_MINIMAL_BUILD STREQUAL "yes")
add_library(xdp_aie_trace_plugin MODULE ${AIE_TRACE_PLUGIN_FILES})
Expand Down Expand Up @@ -63,7 +67,7 @@ elseif (${XRT_NATIVE_BUILD} STREQUAL "yes")

elseif (DEFINED XRT_AIE_BUILD AND ${XRT_NATIVE_BUILD} STREQUAL "no")

add_library(xdp_aie_trace_plugin MODULE ${AIE_TRACE_PLUGIN_FILES})
add_library(xdp_aie_trace_plugin MODULE ${AIE_TRACE_PLUGIN_FILES} ${AIE_TRACE_UTIL_FILES})

add_dependencies(xdp_aie_trace_plugin xdp_core xrt_coreutil)
target_link_libraries(xdp_aie_trace_plugin PRIVATE xdp_core xrt_coreutil xaiengine)
Expand Down
90 changes: 59 additions & 31 deletions src/runtime_src/xdp/profile/plugin/aie_trace/aie_trace_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,38 +22,66 @@
#include <memory>

namespace xdp {

class VPDatabase;

class VPDatabase;

// AIE trace configurations can be done in different ways depending
// on the platform. For example, platforms like the VCK5000 or
// discovery platform, where the host code runs on the x86 and the AIE
// is not directly accessible, will require configuration be done via
// PS kernel.
class AieTraceImpl {

protected:
VPDatabase *db = nullptr;
std::shared_ptr<AieTraceMetadata> metadata;

public:
AieTraceImpl(VPDatabase *database, std::shared_ptr<AieTraceMetadata> metadata)
: db(database), metadata(metadata) {}

AieTraceImpl() = delete;
virtual ~AieTraceImpl(){};

virtual void updateDevice() = 0;
virtual void freeResources() = 0;
virtual void pollTimers(uint64_t index, void *handle) = 0;
virtual uint64_t checkTraceBufSize(uint64_t size) = 0;
/*
* If trace module is running, it might buffer partial trace.
* This leftover trace needs to be force flushed at the end using a custom end
* event. This applies to trace windowing on AIE1 and all scenarios on AIE2.
*/
virtual void flushAieTileTraceModule() = 0;
};
/**
* @brief Base class for AI Engine trace implementations
* @details Trace configurations can be done in different ways depending
* on the platform. For example, platforms like the VCK5000 or
* discovery platform, where the host code runs on the x86 and the
* AIE is not directly accessible, will require configuration be
* done via PS kernel.
*/
class AieTraceImpl {
public:
/**
* @brief AIE Trace implementation constructor
* @param database Profile database for storing results and configuation
* @param metadata Design-specific AIE metadata typically taken from xclbin
*/
AieTraceImpl(VPDatabase* database, std::shared_ptr<AieTraceMetadata> metadata)
:db(database), metadata(metadata) {}

AieTraceImpl() = delete;
/// @brief AIE Trace implementation destructor
virtual ~AieTraceImpl() {};

protected:
/// @brief Database for configuration and results
VPDatabase* db = nullptr;

/// @brief Trace metadata parsed from user settings
std::shared_ptr<AieTraceMetadata> metadata;

public:
/// @brief Update device (e.g., after loading xclbin)
virtual void updateDevice() = 0;

/// @brief Stop and release resources (e.g., counters, ports)
virtual void freeResources() = 0;

/**
* @brief Poll AIE timers (for system timeline only)
* @param index Device index number
* @param handle Pointer to device handle
*/
virtual void pollTimers(uint64_t index, void* handle) = 0;

/**
* @brief Verify correctness of trace buffer size
* @param size Requested size of trace buffer
*/
virtual uint64_t checkTraceBufSize(uint64_t size) = 0;

/**
* @brief Flush trace modules by forcing end events
* @details Trace modules buffer partial packets. At end of run, these need to be
* flushed using a custom end event. This applies to trace windowing and
* passive tiles like memory and interface.
*/
virtual void flushTraceModules() = 0;
};

} // namespace xdp

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ void AieTracePluginUnified::flushAIEDevice(void *handle) {
return;

// Flush AIE then datamovers
AIEData.implementation->flushAieTileTraceModule();
AIEData.implementation->flushTraceModules();
flushOffloader(AIEData.offloader, false);
}

Expand Down Expand Up @@ -433,7 +433,7 @@ void AieTracePluginUnified::finishFlushAIEDevice(void *handle) {
endPollforDevice(handle);

// Flush AIE then datamovers
AIEData.implementation->flushAieTileTraceModule();
AIEData.implementation->flushTraceModules();
flushOffloader(AIEData.offloader, true);
XDPPlugin::endWrite();
(db->getStaticInfo()).deleteCurrentlyUsedDeviceInterface(AIEData.deviceID);
Expand All @@ -453,7 +453,7 @@ void AieTracePluginUnified::writeAll(bool openNewFiles) {
auto &AIEData = kv.second;

if (AIEData.valid) {
AIEData.implementation->flushAieTileTraceModule();
AIEData.implementation->flushTraceModules();
flushOffloader(AIEData.offloader, true);
}
}
Expand Down
Loading