Skip to content

Commit 275bbcb

Browse files
authored
xrt-smi rearchitecture Patch 4 (#8714)
* xrt-smi rearchitecture Signed-off-by: Akshay Tondak <[email protected]> * Fix for windows build failure Signed-off-by: Akshay Tondak <[email protected]> * End of file new line handling Signed-off-by: Akshay Tondak <[email protected]> * Move all configuration generation to a common file Signed-off-by: Akshay Tondak <[email protected]> * Adding missing files Signed-off-by: Akshay Tondak <[email protected]> * Windows build failure fix Signed-off-by: Akshay Tondak <[email protected]> * Windows build failure fix and converting to snake case Signed-off-by: Akshay Tondak <[email protected]> * Adding missing XRT_CORE_COMMON_SOURCE define Signed-off-by: Akshay Tondak <[email protected]> * Some review comments handled and pipeline re-trigger Signed-off-by: Akshay Tondak <[email protected]> * Moving re-usable code to base class from which each shim can derive Signed-off-by: Akshay Tondak <[email protected]> * Fixing edge build error Signed-off-by: Akshay Tondak <[email protected]> * Removing some extra includes Signed-off-by: Akshay Tondak <[email protected]> * Formatting review comment changes Signed-off-by: Akshay Tondak <[email protected]> * Exporting required APIs for MCDM Signed-off-by: Akshay Tondak <[email protected]> * Adding std::move for efficient object creation Signed-off-by: Akshay Tondak <[email protected]> --------- Signed-off-by: Akshay Tondak <[email protected]>
1 parent 337b125 commit 275bbcb

24 files changed

+592
-851
lines changed

src/runtime_src/core/common/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ add_library(core_common_library_objects OBJECT
4343
sysinfo.cpp
4444
xclbin_parser.cpp
4545
xclbin_swemu.cpp
46+
smi.cpp
4647
)
4748

4849
target_include_directories(core_common_library_objects

src/runtime_src/core/common/smi.cpp

Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved.
3+
4+
#define XRT_CORE_COMMON_SOURCE
5+
6+
// Local - Include Files
7+
#include "smi.h"
8+
9+
// 3rd Party Library - Include Files
10+
#include <boost/property_tree/json_parser.hpp>
11+
#include <boost/property_tree/ptree.hpp>
12+
13+
#include <string>
14+
#include <vector>
15+
16+
namespace xrt_core::smi {
17+
18+
using boost::property_tree::ptree;
19+
20+
ptree
21+
option::
22+
to_ptree() const
23+
{
24+
boost::property_tree::ptree pt;
25+
pt.put("name", name);
26+
pt.put("description", description);
27+
pt.put("type", type);
28+
pt.put("alias", alias);
29+
pt.put("default_value", default_value);
30+
pt.put("value_type", value_type);
31+
if (!description_array.empty()) {
32+
boost::property_tree::ptree description_array_ptree;
33+
for (const auto& desc : description_array) {
34+
boost::property_tree::ptree desc_node;
35+
desc_node.put("name", desc.name);
36+
desc_node.put("description", desc.description);
37+
desc_node.put("type", desc.type);
38+
description_array_ptree.push_back(std::make_pair("", desc_node));
39+
}
40+
pt.add_child("description_array", description_array_ptree);
41+
}
42+
return pt;
43+
}
44+
45+
const tuple_vector&
46+
smi_base::
47+
get_validate_test_desc() const
48+
{
49+
static const tuple_vector validate_test_desc = {
50+
{"aie-reconfig-overhead", "Run end-to-end array reconfiguration overhead through shim DMA", "hidden"},
51+
{"all", "All applicable validate tests will be executed (default)", "common"},
52+
{"cmd-chain-latency", "Run end-to-end latency test using command chaining", "common"},
53+
{"cmd-chain-throughput", "Run end-to-end throughput test using command chaining", "common"},
54+
{"df-bw", "Run bandwidth test on data fabric", "common"},
55+
{"gemm", "Measure the TOPS value of GEMM operations", "common"},
56+
{"latency", "Run end-to-end latency test", "common"},
57+
{"quick", "Run a subset of four tests: \n1. latency \n2. throughput \n3. cmd-chain-latency \n4. cmd-chain-throughput", "common"},
58+
{"spatial-sharing-overhead", "Run Spatial Sharing Overhead Test", "hidden"},
59+
{"tct-all-col", "Measure average TCT processing time for all columns", "common"},
60+
{"tct-one-col", "Measure average TCT processing time for one column", "common"},
61+
{"temporal-sharing-overhead", "Run Temporal Sharing Overhead Test", "hidden"},
62+
{"throughput", "Run end-to-end throughput test", "common"},
63+
{"aux-connection", "Check if auxiliary power is connected", "common"},
64+
{"dma", "Run dma test", "common"},
65+
{"thostmem-bw", "Run 'bandwidth kernel' when host memory is enabled", "common"},
66+
{"m2m", "Run M2M test", "common"},
67+
{"mem-bw", "Run 'bandwidth kernel' and check the throughput", "common"},
68+
{"p2p", "Run P2P test", "common"},
69+
{"pcie-link", "Check if PCIE link is active", "common"},
70+
{"sc-version","Check if SC firmware is up-to-date", "common"},
71+
{"verify", "Run 'Hello World' kernel test", "common"}
72+
};
73+
return validate_test_desc;
74+
}
75+
76+
const tuple_vector&
77+
smi_base::
78+
get_examine_report_desc() const
79+
{
80+
static const tuple_vector examine_report_desc = {
81+
{"aie-partitions", "AIE partition information", "common"},
82+
{"host", "Host information", "common"},
83+
{"platform", "Platforms flashed on the device", "common"},
84+
{"telemetry", "Telemetry data for the device", "common"},
85+
{"aie", "AIE metadata in xclbin", "common"},
86+
{"aiemem", "AIE memory tile information", "common"},
87+
{"aieshim", "AIE shim tile status", "common"},
88+
{"debug-ip-status", "Status of Debug IPs present in xclbin loaded on device", "common"},
89+
{"dynamic-regions", "Information about the xclbin and the compute units", "common"},
90+
{"electrical", "Electrical and power sensors present on the device", "common"},
91+
{"error", "Asyncronus Error present on the device", "common"},
92+
{"firewall", "Firewall status", "common"},
93+
{"mailbox", "Mailbox metrics of the device", "common"},
94+
{"mechanical", "Mechanical sensors on and surrounding the device", "common"},
95+
{"memory", "Memory information present on the device", "common"},
96+
{"pcie-info", "Pcie information of the device", "common"},
97+
{"qspi-status", "QSPI write protection status", "common"},
98+
{"thermal", "Thermal sensors present on the device", "common"}
99+
};
100+
return examine_report_desc;
101+
}
102+
103+
std::vector<basic_option>
104+
smi_base::
105+
construct_run_option_description() const
106+
{
107+
std::vector<basic_option> run_option_descriptions;
108+
for (const auto& [name, description, type] : get_validate_test_desc()) {
109+
run_option_descriptions.push_back({name, description, type});
110+
}
111+
return run_option_descriptions;
112+
}
113+
114+
std::vector<basic_option>
115+
smi_base::
116+
construct_report_option_description() const
117+
{
118+
std::vector<basic_option> report_option_descriptions;
119+
for (const auto& [name, description, type] : get_examine_report_desc()) {
120+
report_option_descriptions.push_back({name, description, type});
121+
}
122+
return report_option_descriptions;
123+
}
124+
125+
ptree
126+
smi_base::
127+
construct_validate_subcommand() const
128+
{
129+
ptree subcommand;
130+
subcommand.put("name", "validate");
131+
subcommand.put("description", "Validates the given device by executing the platform's validate executable.");
132+
subcommand.put("type", "common");
133+
134+
std::vector<option> options = {
135+
{"device", "d", "The Bus:Device.Function (e.g., 0000:d8:00.0) device of interest", "common", "", "string"},
136+
{"format", "f", "Report output format. Valid values are:\n"
137+
"\tJSON - Latest JSON schema\n"
138+
"\tJSON-2020.2 - JSON 2020.2 schema", "common", "JSON", "string"},
139+
{"output", "o", "Direct the output to the given file", "common", "", "string"},
140+
{"help", "h", "Help to use this sub-command", "common", "", "none"},
141+
{"run", "r", "Run a subset of the test suite. Valid options are:\n", "common", "", "array", construct_run_option_description()},
142+
{"path", "p", "Path to the directory containing validate xclbins", "hidden", "", "string"},
143+
{"param", "", "Extended parameter for a given test. Format: <test-name>:<key>:<value>", "hidden", "", "string"},
144+
{"pmode", "", "Specify which power mode to run the benchmarks in. Note: Some tests might be unavailable for some modes", "hidden", "", "string"}
145+
};
146+
147+
ptree options_ptree;
148+
for (const auto& option : options) {
149+
options_ptree.push_back(std::make_pair("", option.to_ptree()));
150+
}
151+
152+
subcommand.add_child("options", options_ptree);
153+
return subcommand;
154+
}
155+
156+
ptree
157+
smi_base::
158+
construct_examine_subcommand() const
159+
{
160+
ptree subcommand;
161+
subcommand.put("name", "examine");
162+
subcommand.put("type", "common");
163+
subcommand.put("description", "This command will 'examine' the state of the system/device and will generate a report of interest in a text or JSON format.");
164+
165+
std::vector<option> options = {
166+
{"device", "d", "The Bus:Device.Function (e.g., 0000:d8:00.0) device of interest", "common", "", "string"},
167+
{"format", "f", "Report output format. Valid values are:\n"
168+
"\tJSON - Latest JSON schema\n"
169+
"\tJSON-2020.2 - JSON 2020.2 schema", "common", "", "string"},
170+
{"output", "o", "Direct the output to the given file", "common", "", "string"},
171+
{"help", "h", "Help to use this sub-command", "common", "", "none"},
172+
{"report", "r", "The type of report to be produced. Reports currently available are:\n", "common", "", "array", construct_report_option_description()},
173+
{"element", "e", "Filters individual elements(s) from the report. Format: '/<key>/<key>/...'", "hidden", "", "array"}
174+
};
175+
176+
ptree options_ptree;
177+
for (const auto& option : options) {
178+
options_ptree.push_back(std::make_pair("", option.to_ptree()));
179+
}
180+
181+
subcommand.add_child("options", options_ptree);
182+
return subcommand;
183+
}
184+
185+
ptree
186+
smi_base::
187+
construct_configure_subcommand() const
188+
{
189+
ptree subcommand;
190+
subcommand.put("name", "configure");
191+
subcommand.put("type", "common");
192+
subcommand.put("description", "Device and host configuration");
193+
194+
std::vector<option> options = {
195+
{"device", "d", "The Bus:Device.Function (e.g., 0000:d8:00.0) device of interest", "common", "", "string"},
196+
{"help", "h", "Help to use this sub-command", "common", "", "none"},
197+
{"daemon", "", "Update the device daemon configuration", "common", "", "none"},
198+
{"purge", "", "Remove the daemon configuration file", "hidden", "", "string"},
199+
{"host", "", "IP or hostname for device peer", "common", "", "string"},
200+
{"security", "", "Update the security level for the device", "hidden", "", "string"},
201+
{"clk_throttle", "", "Enable/disable the device clock throttling", "hidden", "", "string"},
202+
{"ct_threshold_power_override", "", "Update the power threshold in watts", "hidden", "", "string"},
203+
{"ct_threshold_temp_override", "", "Update the temperature threshold in celsius", "hidden", "", "string"},
204+
{"ct_reset", "", "Reset all throttling options", "hidden", "", "string"},
205+
{"showx", "", "Display the device configuration settings", "hidden", "", "string"}
206+
};
207+
208+
ptree options_ptree;
209+
for (const auto& option : options) {
210+
options_ptree.push_back(std::make_pair("", option.to_ptree()));
211+
}
212+
213+
subcommand.add_child("options", options_ptree);
214+
return subcommand;
215+
}
216+
217+
std::string
218+
smi_base::
219+
get_smi_config() const
220+
{
221+
ptree config;
222+
ptree subcommands;
223+
224+
subcommands.push_back(std::make_pair("", construct_validate_subcommand()));
225+
subcommands.push_back(std::make_pair("", construct_examine_subcommand()));
226+
subcommands.push_back(std::make_pair("", construct_configure_subcommand()));
227+
228+
config.add_child("subcommands", subcommands);
229+
230+
std::ostringstream oss;
231+
boost::property_tree::write_json(oss, config, true); // Pretty print with true
232+
return oss.str();
233+
}
234+
235+
std::string
236+
get_smi_config()
237+
{
238+
xrt_core::smi::smi_base instance;
239+
240+
return instance.get_smi_config();
241+
}
242+
} // namespace xrt_core::smi

src/runtime_src/core/common/smi.h

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved.
3+
4+
#pragma once
5+
// Local include files
6+
#include "config.h"
7+
8+
// 3rd Party Library - Include Files
9+
#include <boost/property_tree/ptree.hpp>
10+
11+
#include <string>
12+
#include <tuple>
13+
#include <vector>
14+
15+
namespace xrt_core::smi {
16+
17+
using tuple_vector = std::vector<std::tuple<std::string, std::string, std::string>>;
18+
19+
struct basic_option {
20+
std::string name;
21+
std::string description;
22+
std::string type;
23+
};
24+
25+
struct option : public basic_option {
26+
std::string alias;
27+
std::string default_value;
28+
std::string value_type;
29+
std::vector<basic_option> description_array;
30+
31+
option(const std::string name,
32+
const std::string alias,
33+
const std::string description,
34+
const std::string type,
35+
const std::string default_value,
36+
const std::string value_type,
37+
const std::vector<basic_option>& description_array = {})
38+
: basic_option{std::move(name), std::move(description), std::move(type)},
39+
alias(std::move(alias)),
40+
default_value(std::move(default_value)),
41+
value_type(std::move(value_type)),
42+
description_array(std::move(description_array)) {}
43+
44+
boost::property_tree::ptree to_ptree() const;
45+
};
46+
47+
// Each shim's smi class derives from this class
48+
// and adds its custom functionalities. Currently only validate tests and examine
49+
// reports differ between each shim but going forward, each shim can define its
50+
// custom behavior for xrt-smi as required. This also gives us the flexibility
51+
// to add device specific xrt-smi behavior.
52+
class smi_base {
53+
protected:
54+
55+
XRT_CORE_COMMON_EXPORT
56+
virtual const tuple_vector&
57+
get_validate_test_desc() const;
58+
59+
XRT_CORE_COMMON_EXPORT
60+
virtual const tuple_vector&
61+
get_examine_report_desc() const;
62+
63+
std::vector<basic_option>
64+
construct_run_option_description() const;
65+
66+
std::vector<basic_option>
67+
construct_report_option_description() const;
68+
69+
boost::property_tree::ptree
70+
construct_validate_subcommand() const;
71+
72+
boost::property_tree::ptree
73+
construct_examine_subcommand() const;
74+
75+
boost::property_tree::ptree
76+
construct_configure_subcommand() const;
77+
78+
public:
79+
XRT_CORE_COMMON_EXPORT
80+
std::string get_smi_config() const;
81+
};
82+
83+
XRT_CORE_COMMON_EXPORT
84+
std::string get_smi_config();
85+
86+
} // namespace xrt_core::smi

src/runtime_src/core/edge/user/device_linux.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "xrt.h"
66
#include "zynq_dev.h"
77
#include "aie_sys_parser.h"
8+
#include "smi.h"
89

910
#include "core/common/debug_ip.h"
1011
#include "core/common/query_requests.h"
@@ -678,6 +679,30 @@ struct am_counter
678679
}
679680
};
680681

682+
struct xrt_smi_config
683+
{
684+
using result_type = std::any;
685+
686+
static result_type
687+
get(const xrt_core::device* device, key_type key, const std::any& reqType)
688+
{
689+
if (key != key_type::xrt_smi_config)
690+
throw xrt_core::query::no_such_key(key, "Not implemented");
691+
692+
std::string xrt_smi_config;
693+
const auto xrt_smi_config_type = std::any_cast<xrt_core::query::xrt_smi_config::type>(reqType);
694+
switch (xrt_smi_config_type) {
695+
case xrt_core::query::xrt_smi_config::type::options_config:
696+
xrt_smi_config = shim_edge::smi::get_smi_config();
697+
break;
698+
default:
699+
throw xrt_core::query::no_such_key(key, "Not implemented");
700+
}
701+
702+
return xrt_smi_config;
703+
}
704+
};
705+
681706
struct asm_counter
682707
{
683708
using result_type = query::asm_counter::result_type;
@@ -1058,6 +1083,7 @@ initialize_query_table()
10581083
emplace_func4_request<query::aim_counter, aim_counter>();
10591084
emplace_func4_request<query::am_counter, am_counter>();
10601085
emplace_func4_request<query::asm_counter, asm_counter>();
1086+
emplace_func4_request<query::xrt_smi_config, xrt_smi_config>();
10611087
emplace_func4_request<query::lapc_status, lapc_status>();
10621088
emplace_func4_request<query::spc_status, spc_status>();
10631089
emplace_func4_request<query::accel_deadlock_status, accel_deadlock_status>();

0 commit comments

Comments
 (0)