diff --git a/src/runtime_src/core/include/xrt/xrt_aie.h b/src/runtime_src/core/include/xrt/xrt_aie.h index f1c986a9608..02db079465c 100644 --- a/src/runtime_src/core/include/xrt/xrt_aie.h +++ b/src/runtime_src/core/include/xrt/xrt_aie.h @@ -465,6 +465,7 @@ class buffer : public detail::pimpl * * This constructor initializes a buffer object with the specified device, xclbin UUID, and string identifier. This throws an exception if no GMIO/External buffer exists with given name */ + [[deprecated("deprecated, please use buffer(hw_context, name) instead")]] buffer(const xrt::device& device, const xrt::uuid& uuid, const std::string& name); /** diff --git a/src/runtime_src/core/include/xrt/xrt_device.h b/src/runtime_src/core/include/xrt/xrt_device.h index 8255dc0868b..9a4a442b7d9 100644 --- a/src/runtime_src/core/include/xrt/xrt_device.h +++ b/src/runtime_src/core/include/xrt/xrt_device.h @@ -342,6 +342,7 @@ class device * @return * UUID of argument xclbin */ + [[deprecated("deprecated, please use hw_context() instead")]] XCL_DRIVER_DLLESPEC uuid load_xclbin(const axlf* xclbin); @@ -358,6 +359,7 @@ class device * the xclbin. Using this function allows one time * allocation of data that needs to be kept in memory. */ + [[deprecated("deprecated, please use hw_context() instead")]] XCL_DRIVER_DLLESPEC uuid load_xclbin(const std::string& xclbin_fnm); @@ -374,6 +376,7 @@ class device * caller. The xrt::xclbin object must contain the complete axlf * structure. */ + [[deprecated("deprecated, please use hw_context() instead")]] XCL_DRIVER_DLLESPEC uuid load_xclbin(const xrt::xclbin& xclbin); diff --git a/src/runtime_src/core/include/xrt/xrt_graph.h b/src/runtime_src/core/include/xrt/xrt_graph.h index ff7d28139bd..3e5b9da0918 100644 --- a/src/runtime_src/core/include/xrt/xrt_graph.h +++ b/src/runtime_src/core/include/xrt/xrt_graph.h @@ -64,6 +64,7 @@ class graph * @param am * Open the graph with specified access (default primary) */ + [[deprecated("deprecated, please use graph(hw_context, name) instead")]] graph(const xrt::device& device, const xrt::uuid& xclbin_id, const std::string& name, access_mode am = access_mode::primary); diff --git a/src/runtime_src/core/tools/common/tests/TestAiePl.cpp b/src/runtime_src/core/tools/common/tests/TestAiePl.cpp index d1e94b66680..40ea136332d 100644 --- a/src/runtime_src/core/tools/common/tests/TestAiePl.cpp +++ b/src/runtime_src/core/tools/common/tests/TestAiePl.cpp @@ -15,8 +15,10 @@ namespace XBU = XBUtilities; // XRT includes #include "xrt/experimental/xrt_system.h" +#include "xrt/experimental/xrt_xclbin.h" #include "xrt/xrt_bo.h" #include "xrt/xrt_device.h" +#include "xrt/xrt_hw_context.h" #include "xrt/xrt_kernel.h" #include @@ -42,7 +44,7 @@ TestAiePl::run(const std::shared_ptr& dev) return ptree; } -bool run_pl_controller_aie1(xrt::device device, xrt::uuid uuid, boost::property_tree::ptree& aie_meta, std::string dma_lock) { +bool run_pl_controller_aie1(xrt::hw_context hw_ctx, boost::property_tree::ptree& aie_meta, std::string dma_lock) { xf::plctrl::plController m_pl_ctrl(aie_meta, dma_lock.c_str()); unsigned int num_iter = 2; @@ -83,17 +85,17 @@ bool run_pl_controller_aie1(xrt::device device, xrt::uuid uuid, boost::property_ unsigned int mem_size_bytes = 0; auto sender_receiver_k1 = - xrt::kernel(device, uuid, "sender_receiver:{sender_receiver_1}"); + xrt::kernel(hw_ctx, "sender_receiver:{sender_receiver_1}"); auto controller_k1 = - xrt::kernel(device, uuid, "pl_controller_kernel:{controller_1}"); + xrt::kernel(hw_ctx, "pl_controller_kernel:{controller_1}"); // output memory mem_size_bytes = num_sample * num_iter * sizeof(uint32_t); - auto out_bo1 = xrt::bo(device, mem_size_bytes, sender_receiver_k1.group_id(output_buffer_idx)); + auto out_bo1 = xrt::bo(hw_ctx, mem_size_bytes, sender_receiver_k1.group_id(output_buffer_idx)); auto host_out1 = out_bo1.map(); // input memory - auto in_bo1 = xrt::bo(device, mem_size_bytes, sender_receiver_k1.group_id(input_buffer_idx)); + auto in_bo1 = xrt::bo(hw_ctx, mem_size_bytes, sender_receiver_k1.group_id(input_buffer_idx)); auto host_in1 = in_bo1.map(); @@ -104,7 +106,7 @@ bool run_pl_controller_aie1(xrt::device device, xrt::uuid uuid, boost::property_ in_bo1.sync(XCL_BO_SYNC_BO_TO_DEVICE, mem_size_bytes, /*OFFSET=*/0); uint32_t num_pm = m_pl_ctrl.get_microcode_size(); /// sizeof(int32_t); - auto pm_bo = xrt::bo(device, (num_pm + 1) * sizeof(uint32_t), + auto pm_bo = xrt::bo(hw_ctx, (num_pm + 1) * sizeof(uint32_t), controller_k1.group_id(pm_buffer_idx)); auto host_pm = pm_bo.map(); @@ -145,7 +147,7 @@ bool run_pl_controller_aie1(xrt::device device, xrt::uuid uuid, boost::property_ return match; } -bool run_pl_controller_aie2(xrt::device device, xrt::uuid uuid, boost::property_tree::ptree& aie_meta) { +bool run_pl_controller_aie2(xrt::hw_context hw_ctx, boost::property_tree::ptree& aie_meta) { // instance of plController xf::plctrl::plController_aie2 m_pl_ctrl(aie_meta); @@ -170,16 +172,16 @@ bool run_pl_controller_aie2(xrt::device device, xrt::uuid uuid, boost::property_ uint32_t mem_size_bytes = 0; // XRT auto get group_id - auto sender_receiver_k1 = xrt::kernel(device, uuid, "sender_receiver:{sender_receiver_1}"); - auto controller_k1 = xrt::kernel(device, uuid, "pl_controller_top:{controller_1}"); + auto sender_receiver_k1 = xrt::kernel(hw_ctx, "sender_receiver:{sender_receiver_1}"); + auto controller_k1 = xrt::kernel(hw_ctx, "pl_controller_top:{controller_1}"); // output memory mem_size_bytes = num_sample * num_iter * sizeof(uint32_t); - auto out_bo1 = xrt::bo(device, mem_size_bytes, sender_receiver_k1.group_id(output_buffer_idx)); + auto out_bo1 = xrt::bo(hw_ctx, mem_size_bytes, sender_receiver_k1.group_id(output_buffer_idx)); auto host_out1 = out_bo1.map(); // input memory - auto in_bo1 = xrt::bo(device, mem_size_bytes, sender_receiver_k1.group_id(input_buffer_idx)); + auto in_bo1 = xrt::bo(hw_ctx, mem_size_bytes, sender_receiver_k1.group_id(input_buffer_idx)); auto host_in1 = in_bo1.map(); // initialize input memory @@ -190,7 +192,7 @@ bool run_pl_controller_aie2(xrt::device device, xrt::uuid uuid, boost::property_ in_bo1.sync(XCL_BO_SYNC_BO_TO_DEVICE, mem_size_bytes, /*OFFSET=*/0); uint32_t num_pm = m_pl_ctrl.get_microcode_size(); /// sizeof(uint32_t); - auto pm_bo = xrt::bo(device, (num_pm + 1) * sizeof(uint32_t), + auto pm_bo = xrt::bo(hw_ctx, (num_pm + 1) * sizeof(uint32_t), controller_k1.group_id(pm_buffer_idx)); auto host_pm = pm_bo.map(); @@ -251,7 +253,9 @@ TestAiePl::runTest(const std::shared_ptr& dev, boost::property } ptree.put("xclbin_directory", std::filesystem::path(test_path)); - const auto uuid = device.load_xclbin(binaryFile.string()); + auto xclbin = xrt::xclbin(binaryFile.string()); + auto uuid = device.register_xclbin(xclbin); + xrt::hw_context hw_ctx(device, uuid); boost::property_tree::ptree aie_meta; auto metadata_pair = dev->get_axlf_section(AIE_METADATA); @@ -279,11 +283,11 @@ TestAiePl::runTest(const std::shared_ptr& dev, boost::property case 1: { std::string dma_lock_file = "dma_lock_report.json"; auto dma_lock = std::filesystem::path(test_path) / dma_lock_file; - match = run_pl_controller_aie1(device, uuid, aie_meta, dma_lock.string()); + match = run_pl_controller_aie1(hw_ctx, aie_meta, dma_lock.string()); break; } case 2: - match = run_pl_controller_aie2(device, uuid, aie_meta); + match = run_pl_controller_aie2(hw_ctx, aie_meta); break; default: XBValidateUtils::logger(ptree, "Error", "Unsupported AIE Hardware"); diff --git a/src/runtime_src/core/tools/common/tests/TestAiePs.cpp b/src/runtime_src/core/tools/common/tests/TestAiePs.cpp index d1850174d26..eabc6a8ef4f 100644 --- a/src/runtime_src/core/tools/common/tests/TestAiePs.cpp +++ b/src/runtime_src/core/tools/common/tests/TestAiePs.cpp @@ -7,8 +7,10 @@ #include "TestValidateUtilities.h" #include "tools/common/XBUtilities.h" #include "tools/common/XBUtilitiesCore.h" +#include "xrt/experimental/xrt_xclbin.h" #include "xrt/xrt_bo.h" #include "xrt/xrt_device.h" +#include "xrt/xrt_hw_context.h" #include "xrt/xrt_kernel.h" namespace XBU = XBUtilities; @@ -54,7 +56,9 @@ TestAiePs::runTest(const std::shared_ptr& dev, boost::property ptree.put("status", XBValidateUtils::test_token_skipped); return; } - device.load_xclbin(path); + auto dep_xclbin = xrt::xclbin(path); + auto dep_uuid = device.register_xclbin(dep_xclbin); + xrt::hw_context dep_hw_ctx(device, dep_uuid); } const std::string b_file = XBValidateUtils::findXclbinPath(dev, ptree); @@ -70,9 +74,11 @@ TestAiePs::runTest(const std::shared_ptr& dev, boost::property const int input_size_allocated = ((input_size_in_bytes / 4096) + ((input_size_in_bytes % 4096) > 0)) * 4096; const int output_size_allocated = ((output_size_in_bytes / 4096) + ((output_size_in_bytes % 4096) > 0)) * 4096; - auto uuid = device.load_xclbin(b_file); - auto aie_kernel = xrt::kernel(device,uuid, "aie_kernel"); - auto out_bo= xrt::bo(device, output_size_allocated, aie_kernel.group_id(2)); + auto xclbin = xrt::xclbin(b_file); + auto uuid = device.register_xclbin(xclbin); + xrt::hw_context hw_ctx(device, uuid); + auto aie_kernel = xrt::kernel(hw_ctx, "aie_kernel"); + auto out_bo= xrt::bo(hw_ctx, output_size_allocated, aie_kernel.group_id(2)); auto out_bomapped = out_bo.map(); memset(out_bomapped, 0, output_size_in_bytes); diff --git a/src/runtime_src/core/tools/common/tests/TestBandwidthKernel.cpp b/src/runtime_src/core/tools/common/tests/TestBandwidthKernel.cpp index 0361410f0ea..496d388025b 100644 --- a/src/runtime_src/core/tools/common/tests/TestBandwidthKernel.cpp +++ b/src/runtime_src/core/tools/common/tests/TestBandwidthKernel.cpp @@ -11,8 +11,10 @@ namespace XBU = XBUtilities; #include #include #include +#include "xrt/experimental/xrt_xclbin.h" #include "xrt/xrt_bo.h" #include "xrt/xrt_device.h" +#include "xrt/xrt_hw_context.h" #include "xrt/xrt_kernel.h" #ifdef _WIN32 @@ -78,7 +80,7 @@ marshal_build_metadata(std::string test_path, unsigned int* num_kernel, unsigned } static std::vector -create_kernel_objects(xrt::device device, xrt::uuid xclbin_uuid, int num_kernel) +create_kernel_objects(xrt::hw_context hw_ctx, int num_kernel) { std::string krnl_name = "bandwidth"; std::vector krnls(num_kernel); @@ -90,7 +92,7 @@ create_kernel_objects(xrt::device device, xrt::uuid xclbin_uuid, int num_kernel) // compute unit. // For such case, this kernel object can only access the specific // Compute unit - krnls[i] = xrt::kernel(device, xclbin_uuid, krnl_name_full.c_str()); + krnls[i] = xrt::kernel(hw_ctx, krnl_name_full.c_str()); } return krnls; } @@ -145,7 +147,7 @@ calculate_throughput(std::chrono::time_point } static std::pair> -test_bandwidth_ddr(xrt::device device, std::vector krnls, int num_kernel_ddr) +test_bandwidth_ddr(xrt::hw_context hw_ctx, std::vector krnls, int num_kernel_ddr) { double max_throughput = 0; double mbpersec = 0; @@ -166,8 +168,8 @@ test_bandwidth_ddr(xrt::device device, std::vector krnls, int num_k // Creating Buffers for (int i = 0; i < num_kernel_ddr; i++) { - input_buffer[i] = xrt::bo(device, vector_size_bytes, krnls[i].group_id(0)); - output_buffer[i] = xrt::bo(device, vector_size_bytes, krnls[i].group_id(1)); + input_buffer[i] = xrt::bo(hw_ctx, vector_size_bytes, krnls[i].group_id(0)); + output_buffer[i] = xrt::bo(hw_ctx, vector_size_bytes, krnls[i].group_id(1)); } for (int i = 0; i < num_kernel_ddr; i++) { @@ -214,7 +216,7 @@ test_bandwidth_ddr(xrt::device device, std::vector krnls, int num_k } static double -test_bandwidth_hbm(xrt::device device, std::vector krnls, int num_kernel) +test_bandwidth_hbm(xrt::hw_context hw_ctx, std::vector krnls, int num_kernel) { double max_throughput = 0; double mbpersec = 0; @@ -232,8 +234,8 @@ test_bandwidth_hbm(xrt::device device, std::vector krnls, int num_k xrt::bo input_buffer, output_buffer; // Creating Buffers - input_buffer = xrt::bo(device, vector_size_bytes, krnls[num_kernel - 1].group_id(0)); - output_buffer = xrt::bo(device, vector_size_bytes, krnls[num_kernel - 1].group_id(1)); + input_buffer = xrt::bo(hw_ctx, vector_size_bytes, krnls[num_kernel - 1].group_id(0)); + output_buffer = xrt::bo(hw_ctx, vector_size_bytes, krnls[num_kernel - 1].group_id(1)); input_buffer.write(input_host.data()); input_buffer.sync(XCL_BO_SYNC_BO_TO_DEVICE); @@ -299,13 +301,15 @@ TestBandwidthKernel::runTest(const std::shared_ptr& dev, boost ptree.put("status", XBValidateUtils::test_token_skipped); return; } - auto xclbin_uuid = device.load_xclbin(b_file); + auto xclbin = xrt::xclbin(b_file); + auto uuid = device.register_xclbin(xclbin); + xrt::hw_context hw_ctx(device, uuid); - std::vector krnls = create_kernel_objects(device, xclbin_uuid, num_kernel); + std::vector krnls = create_kernel_objects(hw_ctx, num_kernel); try { if (num_kernel_ddr) { - auto throughputs = test_bandwidth_ddr(device, krnls, num_kernel_ddr); + auto throughputs = test_bandwidth_ddr(hw_ctx, krnls, num_kernel_ddr); double max_throughput = throughputs.first; std::vector throughput_per_kernel = throughputs.second; XBValidateUtils::logger(ptree, "Details", boost::str(boost::format("Throughput (Type: DDR) (Bank count: %d) : %.1f MB/s") % num_kernel_ddr % max_throughput)); @@ -315,7 +319,7 @@ TestBandwidthKernel::runTest(const std::shared_ptr& dev, boost } } if (chk_hbm_mem) { - double max_throughput = test_bandwidth_hbm(device, krnls, num_kernel); + double max_throughput = test_bandwidth_hbm(hw_ctx, krnls, num_kernel); XBValidateUtils::logger(ptree, "Details", boost::str(boost::format("Throughput (Type: HBM) (Bank count: 1) : %.1f MB/s") % max_throughput)); } } catch (const std::runtime_error& e) { diff --git a/src/runtime_src/core/tools/common/tests/TestHostMemBandwidthKernel.cpp b/src/runtime_src/core/tools/common/tests/TestHostMemBandwidthKernel.cpp index 6fb9cedfef6..179b44e1581 100644 --- a/src/runtime_src/core/tools/common/tests/TestHostMemBandwidthKernel.cpp +++ b/src/runtime_src/core/tools/common/tests/TestHostMemBandwidthKernel.cpp @@ -11,8 +11,10 @@ namespace XBU = XBUtilities; #include #include #include +#include "xrt/experimental/xrt_xclbin.h" #include "xrt/xrt_bo.h" #include "xrt/xrt_device.h" +#include "xrt/xrt_hw_context.h" #include "xrt/xrt_kernel.h" #ifdef _WIN32 @@ -97,13 +99,15 @@ TestHostMemBandwidthKernel::runTest(const std::shared_ptr& dev } std::string krnl_name = "bandwidth"; - xrt::uuid xclbin_uuid; + xrt::xclbin xclbin; if (retVal == EOPNOTSUPP) { krnl_name = "slavebridge"; - xclbin_uuid = device.load_xclbin(old_binary_file.string()); + xclbin = xrt::xclbin(old_binary_file.string()); } else { - xclbin_uuid = device.load_xclbin(b_file); + xclbin = xrt::xclbin(b_file); } + auto uuid = device.register_xclbin(xclbin); + xrt::hw_context hw_ctx(device, uuid); std::vector krnls(num_kernel); for (int i = 0; i < num_kernel; i++) { @@ -119,7 +123,7 @@ TestHostMemBandwidthKernel::runTest(const std::shared_ptr& dev // compute unit. // For such case, this kernel object can only access the specific // Compute unit - krnls[i] = xrt::kernel(device, xclbin_uuid, krnl_name_full.c_str()); + krnls[i] = xrt::kernel(hw_ctx, krnl_name_full.c_str()); } double max_throughput = 0; diff --git a/src/runtime_src/core/tools/common/tests/TestPsIops.cpp b/src/runtime_src/core/tools/common/tests/TestPsIops.cpp index 5a6e039a5c3..14d49fb596e 100644 --- a/src/runtime_src/core/tools/common/tests/TestPsIops.cpp +++ b/src/runtime_src/core/tools/common/tests/TestPsIops.cpp @@ -15,8 +15,10 @@ namespace XBU = XBUtilities; #include #include "ps_iops_util/xilutil.hpp" +#include "xrt/experimental/xrt_xclbin.h" #include "xrt/xrt_bo.h" #include "xrt/xrt_device.h" +#include "xrt/xrt_hw_context.h" #include "xrt/xrt_kernel.h" #ifdef _WIN32 @@ -97,7 +99,7 @@ runThread(std::vector& cmds, unsigned int total, arg_t& arg) } static void -runTestThread(const xrt::device& device, const xrt::kernel& hello_world, +runTestThread(const xrt::hw_context& hw_ctx, const xrt::kernel& hello_world, arg_t& arg) { std::vector cmds; @@ -105,10 +107,10 @@ runTestThread(const xrt::device& device, const xrt::kernel& hello_world, for (int i = 0; i < arg.queueLength; i++) { auto run = xrt::run(hello_world); - auto bo0 = xrt::bo(device, DATA_SIZE, hello_world.group_id(0)); + auto bo0 = xrt::bo(hw_ctx, DATA_SIZE, hello_world.group_id(0)); run.set_arg(0, bo0); bos.push_back(std::move(bo0)); - auto bo1 = xrt::bo(device, DATA_SIZE, hello_world.group_id(1)); + auto bo1 = xrt::bo(hw_ctx, DATA_SIZE, hello_world.group_id(1)); run.set_arg(1, bo1); bos.push_back(std::move(bo1)); run.set_arg(2, COUNT); @@ -129,8 +131,10 @@ TestPsIops::testMultiThreads(const std::string& dev, const std::string& xclbin_f std::vector arg(threadNumber); xrt::device device(dev); - auto uuid = device.load_xclbin(xclbin_fn); - auto hello_world = xrt::kernel(device, uuid.get(), krnl.name); + auto xclbin = xrt::xclbin(xclbin_fn); + auto uuid = device.register_xclbin(xclbin); + xrt::hw_context hw_ctx(device, uuid); + auto hello_world = xrt::kernel(hw_ctx, krnl.name); barrier.init(threadNumber + 1); @@ -138,7 +142,7 @@ TestPsIops::testMultiThreads(const std::string& dev, const std::string& xclbin_f arg[i].thread_id = i; arg[i].queueLength = queueLength; arg[i].total = total; - threads[i] = std::thread([&](int i){ runTestThread(device, hello_world, arg[i]); }, i); + threads[i] = std::thread([&](int i){ runTestThread(hw_ctx, hello_world, arg[i]); }, i); } /* Wait threads to prepare to start */ diff --git a/src/runtime_src/core/tools/common/tests/TestPsPlVerify.cpp b/src/runtime_src/core/tools/common/tests/TestPsPlVerify.cpp index 9f376df62cb..d8761dfea85 100644 --- a/src/runtime_src/core/tools/common/tests/TestPsPlVerify.cpp +++ b/src/runtime_src/core/tools/common/tests/TestPsPlVerify.cpp @@ -7,8 +7,10 @@ #include "TestValidateUtilities.h" #include "tools/common/XBUtilities.h" #include "tools/common/XBUtilitiesCore.h" +#include "xrt/experimental/xrt_xclbin.h" #include "xrt/xrt_bo.h" #include "xrt/xrt_device.h" +#include "xrt/xrt_hw_context.h" #include "xrt/xrt_kernel.h" namespace XBU = XBUtilities; @@ -56,7 +58,9 @@ TestPsPlVerify::runTest(const std::shared_ptr& dev, boost::pro return; } - device.load_xclbin(path); + auto dep_xclbin = xrt::xclbin(path); + auto dep_uuid = device.register_xclbin(dep_xclbin); + xrt::hw_context dep_hw_ctx(device, dep_uuid); } // Load ps kernel onto device @@ -70,10 +74,12 @@ TestPsPlVerify::runTest(const std::shared_ptr& dev, boost::pro return; } - auto uuid = device.load_xclbin(b_file); - auto bandwidth_kernel = xrt::kernel(device, uuid, "bandwidth_kernel"); + auto xclbin = xrt::xclbin(b_file); + auto uuid = device.register_xclbin(xclbin); + xrt::hw_context hw_ctx(device, uuid); + auto bandwidth_kernel = xrt::kernel(hw_ctx, "bandwidth_kernel"); - auto max_throughput_bo = xrt::bo(device, 4096, bandwidth_kernel.group_id(1)); + auto max_throughput_bo = xrt::bo(hw_ctx, 4096, bandwidth_kernel.group_id(1)); auto max_throughput = max_throughput_bo.map(); int reps = 10000; diff --git a/src/runtime_src/core/tools/common/tests/TestPsVerify.cpp b/src/runtime_src/core/tools/common/tests/TestPsVerify.cpp index d8ed4cec9e3..660ad7e7e44 100644 --- a/src/runtime_src/core/tools/common/tests/TestPsVerify.cpp +++ b/src/runtime_src/core/tools/common/tests/TestPsVerify.cpp @@ -9,7 +9,9 @@ #include "tools/common/XBUtilitiesCore.h" #include "xrt/xrt_bo.h" #include "xrt/xrt_device.h" +#include "xrt/xrt_hw_context.h" #include "xrt/xrt_kernel.h" +#include "xrt/experimental/xrt_xclbin.h" namespace XBU = XBUtilities; static const int COUNT = 1024; @@ -44,7 +46,9 @@ TestPsVerify::runTest(const std::shared_ptr& dev, boost::prope ptree.put("status", XBValidateUtils::test_token_skipped); return; } - device.load_xclbin(path); + auto dep_xclbin = xrt::xclbin(path); + auto dep_uuid = device.register_xclbin(dep_xclbin); + xrt::hw_context dep_hw_ctx(device, dep_uuid); } const std::string b_file = XBValidateUtils::findXclbinPath(dev, ptree); @@ -55,11 +59,13 @@ TestPsVerify::runTest(const std::shared_ptr& dev, boost::prope return; } - auto uuid = device.load_xclbin(b_file); - auto hello_world = xrt::kernel(device, uuid.get(), "hello_world"); + auto xclbin = xrt::xclbin(b_file); + auto uuid = device.register_xclbin(xclbin); + xrt::hw_context hw_ctx(device, uuid); + auto hello_world = xrt::kernel(hw_ctx, "hello_world"); const size_t DATA_SIZE = COUNT * sizeof(int); - auto bo0 = xrt::bo(device, DATA_SIZE, hello_world.group_id(0)); - auto bo1 = xrt::bo(device, DATA_SIZE, hello_world.group_id(1)); + auto bo0 = xrt::bo(hw_ctx, DATA_SIZE, hello_world.group_id(0)); + auto bo1 = xrt::bo(hw_ctx, DATA_SIZE, hello_world.group_id(1)); auto bo0_map = bo0.map(); auto bo1_map = bo1.map(); std::fill(bo0_map, bo0_map + COUNT, 0); diff --git a/src/runtime_src/core/tools/common/tests/TestVerify.cpp b/src/runtime_src/core/tools/common/tests/TestVerify.cpp index 9698f15d321..6dce2113631 100644 --- a/src/runtime_src/core/tools/common/tests/TestVerify.cpp +++ b/src/runtime_src/core/tools/common/tests/TestVerify.cpp @@ -11,7 +11,9 @@ namespace XBU = XBUtilities; #include #include "xrt/xrt_bo.h" #include "xrt/xrt_device.h" +#include "xrt/xrt_hw_context.h" #include "xrt/xrt_kernel.h" +#include "xrt/experimental/xrt_xclbin.h" static constexpr size_t buffer_size = 64; @@ -46,14 +48,16 @@ TestVerify::run(const std::shared_ptr& dev) ptree.put("status", XBValidateUtils::test_token_skipped); return ptree; } - auto xclbin_uuid = device.load_xclbin(b_file); + auto xclbin = xrt::xclbin(b_file); + auto uuid = device.register_xclbin(xclbin); + xrt::hw_context hw_ctx(device, uuid); xrt::kernel krnl; try { - krnl = xrt::kernel(device, xclbin_uuid, "verify"); + krnl = xrt::kernel(hw_ctx, "verify"); } catch (const std::exception&) { try { - krnl = xrt::kernel(device, xclbin_uuid, "hello"); + krnl = xrt::kernel(hw_ctx, "hello"); } catch (const std::exception&) { XBValidateUtils::logger(ptree, "Error", "Kernel could not be found."); ptree.put("status", XBValidateUtils::test_token_failed); @@ -62,7 +66,7 @@ TestVerify::run(const std::shared_ptr& dev) } // Allocate the output buffer to hold the kernel output - auto output_buffer = xrt::bo(device, sizeof(char) * buffer_size, krnl.group_id(0)); + auto output_buffer = xrt::bo(hw_ctx, sizeof(char) * buffer_size, krnl.group_id(0)); // Run the kernel and store its contents within the allocated output buffer auto run = krnl(output_buffer); diff --git a/src/runtime_src/xocl/core/device.cpp b/src/runtime_src/xocl/core/device.cpp index a3ed60863dd..a8d8fb56cf6 100644 --- a/src/runtime_src/xocl/core/device.cpp +++ b/src/runtime_src/xocl/core/device.cpp @@ -845,8 +845,12 @@ load_program(program* program) m_active = program; - // In order to use virtual CUs (KDMA) we must open a virtual context - m_xdevice->acquire_cu_context(-1,true); + /* COMMENTED OUT: acquire_cu_context(-1) allocates a second hw_ctx and locks + * the bitstream via xocl_add_context, while create_hw_context already did + * so. Double lock causes crash in icap_unlock_bitstream during teardown. + * Skip until proper fix for hw_context flow (e.g. reuse existing hw_ctx). + */ + /* m_xdevice->acquire_cu_context(-1,true); */ } void @@ -856,8 +860,13 @@ unload_program(const program* program) if (m_active == program) { clear_cus(); m_active = nullptr; - if (!m_parent.get()) - m_xdevice->release_cu_context(-1); // release virtual CU context + /* COMMENTED OUT: Matches acquire_cu_context(-1) above. Skip to avoid crash + * in icap_unlock_bitstream (double lock / ref-count mismatch). + */ + /* if (!m_parent.get()) + m_xdevice->release_cu_context(-1); + */ + } } diff --git a/src/runtime_src/xocl/core/device.h b/src/runtime_src/xocl/core/device.h index 13bd01fb6c0..3a5f46a9367 100644 --- a/src/runtime_src/xocl/core/device.h +++ b/src/runtime_src/xocl/core/device.h @@ -121,6 +121,12 @@ class device : public refcount, public _cl_device_id return m_xdevice->get_xrt_device(); } + xrt::hw_context + get_xrt_hwctx(const xrt_core::uuid& uuid) const + { + return m_xdevice->get_xrt_hwctx(uuid); + } + platform* get_platform() const { diff --git a/src/runtime_src/xocl/core/kernel.cpp b/src/runtime_src/xocl/core/kernel.cpp index 2d6d692820d..c19262a2561 100644 --- a/src/runtime_src/xocl/core/kernel.cpp +++ b/src/runtime_src/xocl/core/kernel.cpp @@ -161,7 +161,7 @@ kernel(program* prog, const std::string& name, xrt::xclbin::kernel xk) // Construct kernel run object for each device for (auto device: prog->get_device_range()) { - xrt::kernel xkernel(device->get_xrt_device(), prog->get_xclbin_uuid(device), name); + xrt::kernel xkernel(device->get_xrt_hwctx(prog->get_xclbin_uuid(device)), name); // The run object must limit the CUs to those of the OpenCL device, // which could be a sub-device. Since kernel is not tied to a particular diff --git a/src/runtime_src/xrt/device/device.h b/src/runtime_src/xrt/device/device.h index 35f1617c0b7..c353fad0224 100644 --- a/src/runtime_src/xrt/device/device.h +++ b/src/runtime_src/xrt/device/device.h @@ -169,6 +169,10 @@ class device : public xrt_device get_xrt_device() const { return m_hal->get_xrt_device(); } + xrt::hw_context + get_xrt_hwctx(const uuid& uuid) const + { return m_hal->get_xrt_hwctx(uuid); } + void acquire_cu_context(const uuid& uuid,size_t cuidx,bool shared) { m_hal->acquire_cu_context(uuid,cuidx,shared); } diff --git a/src/runtime_src/xrt/device/hal.h b/src/runtime_src/xrt/device/hal.h index f71d4530029..dbb1d42c4c2 100644 --- a/src/runtime_src/xrt/device/hal.h +++ b/src/runtime_src/xrt/device/hal.h @@ -181,6 +181,9 @@ class device virtual xrt::device get_xrt_device() const = 0; + virtual xrt::hw_context + get_xrt_hwctx(const uuid&) const = 0; + virtual std::shared_ptr get_core_device() const = 0; diff --git a/src/runtime_src/xrt/device/hal2.cpp b/src/runtime_src/xrt/device/hal2.cpp index d9352b716e0..578a8e0e18b 100644 --- a/src/runtime_src/xrt/device/hal2.cpp +++ b/src/runtime_src/xrt/device/hal2.cpp @@ -274,7 +274,9 @@ hal::operations_result device:: loadXclBin(const xclBin* xclbin) { - m_handle.load_xclbin(xclbin); + auto xclbin_obj = xrt::xclbin(reinterpret_cast(xclbin)); + auto uuid = m_handle.register_xclbin(xclbin_obj); + m_hw_contexts[uuid] = xrt::hw_context(m_handle, uuid); // refresh device info on successful load std::lock_guard lk(m_mutex); diff --git a/src/runtime_src/xrt/device/hal2.h b/src/runtime_src/xrt/device/hal2.h index 14119a6e1bb..04a6f342fba 100644 --- a/src/runtime_src/xrt/device/hal2.h +++ b/src/runtime_src/xrt/device/hal2.h @@ -8,6 +8,8 @@ #include "xrt/xrt_device.h" #include "xrt/xrt_bo.h" +#include "xrt/xrt_hw_context.h" +#include "xrt/experimental/xrt_xclbin.h" #include "xrt/detail/ert.h" #include "core/common/device.h" @@ -68,6 +70,12 @@ class device : public xrt_xocl::hal::device xrt::device m_handle; mutable boost::optional m_devinfo; + // In hwctx flow, we create hw contexts from xclbins on demand + // instead of explicitly loading the xclbins. The hwctx are cached + // here and accessors are provided so that xclbin references can be + // converted into the hwctx that has loaded the xclbin. + std::map m_hw_contexts; + mutable std::mutex m_mutex; struct ExecBufferObject : hal::exec_buffer_object @@ -190,6 +198,12 @@ class device : public xrt_xocl::hal::device return m_handle; } + xrt::hw_context + get_xrt_hwctx(const uuid& uuid) const override + { + return m_hw_contexts.at(uuid); + } + std::shared_ptr get_core_device() const override;