From e4f0d20381154c329a62ab2d8b09b109253dacfc Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Wed, 22 Jan 2025 16:28:26 +0100 Subject: [PATCH 1/7] [SYCL] Extend scope of scheduler bypass to safe to bypass events 1) Aling scheduler bypass conditions for CG and for memory ops. 2) Connect an event returned from scheduler bypass case with dependent events. --- sycl/source/detail/queue_impl.cpp | 10 ++++++++++ sycl/source/handler.cpp | 11 +++++++---- sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp | 6 ++++++ 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index dd29a5030f9cb..de8aa37a59e31 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -489,6 +489,16 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, EventImpl); EventImpl->setHandle(UREvent); EventImpl->setEnqueued(); + // connect returned event with dependent events + if (!isInOrder()) { + std::vector ExpandedDepEventImplPtrs; + ExpandedDepEventImplPtrs.reserve(ExpandedDepEvents.size()); + for (const event &DepEvent : ExpandedDepEvents) + ExpandedDepEventImplPtrs.push_back(detail::getSyclObjImpl(DepEvent)); + + EventImpl->getPreparedDepsEvents() = ExpandedDepEventImplPtrs; + EventImpl->cleanDepEventsThroughOneLevel(); + } } if (isInOrder()) { diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 1b774b0ab34ad..8027179c6dd3a 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -468,10 +468,8 @@ event handler::finalize() { if (MQueue && !impl->MGraph && !impl->MSubgraphNode && !MQueue->getCommandGraph() && !impl->CGData.MRequirements.size() && !MStreamStorage.size() && - (!impl->CGData.MEvents.size() || - (MQueue->isInOrder() && - detail::Scheduler::areEventsSafeForSchedulerBypass( - impl->CGData.MEvents, MQueue->getContextImplPtr())))) { + detail::Scheduler::areEventsSafeForSchedulerBypass( + impl->CGData.MEvents, MQueue->getContextImplPtr())) { // if user does not add a new dependency to the dependency graph, i.e. // the graph is not changed, then this faster path is used to submit // kernel bypassing scheduler and avoiding CommandGroup, Command objects @@ -544,6 +542,11 @@ event handler::finalize() { EnqueueKernel(); NewEvent->setEnqueued(); + // connect returned event with dependent events + if (!MQueue->isInOrder()) { + NewEvent->getPreparedDepsEvents() = impl->CGData.MEvents; + NewEvent->cleanDepEventsThroughOneLevel(); + } MLastEvent = detail::createSyclObjFromImpl(std::move(NewEvent)); } diff --git a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp index 9366d63838d08..9a3145e92b733 100644 --- a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp +++ b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp @@ -332,6 +332,8 @@ TEST_F(DependsOnTests, ShortcutFunctionWithWaitList) { ASSERT_NE(Cmd, nullptr); Cmd->MIsBlockable = true; Cmd->MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueBlocked; + // for the test functionality, depenent task HostTaskEvent must be treated as incompleted + HostTaskEventImpl->setStateIncomplete(); auto SingleTaskEvent = Queue.submit([&](sycl::handler &cgh) { cgh.depends_on(HostTaskEvent); @@ -341,6 +343,8 @@ TEST_F(DependsOnTests, ShortcutFunctionWithWaitList) { detail::getSyclObjImpl(SingleTaskEvent); EXPECT_EQ(SingleTaskEventImpl->getHandle(), nullptr); + // make HostTaskEvent completed, so SingleTaskEvent can be enqueued + HostTaskEventImpl->setComplete(); Cmd->MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueSuccess; EventsInWaitList.clear(); @@ -375,6 +379,7 @@ TEST_F(DependsOnTests, BarrierWithWaitList) { ASSERT_NE(Cmd, nullptr); Cmd->MIsBlockable = true; Cmd->MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueBlocked; + HostTaskEventImpl->setStateIncomplete(); auto SingleTaskEvent = Queue.submit([&](sycl::handler &cgh) { cgh.depends_on(HostTaskEvent); @@ -384,6 +389,7 @@ TEST_F(DependsOnTests, BarrierWithWaitList) { detail::getSyclObjImpl(SingleTaskEvent); EXPECT_EQ(SingleTaskEventImpl->getHandle(), nullptr); + HostTaskEventImpl->setComplete(); Cmd->MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueSuccess; EventsInWaitList.clear(); From d8220c243515f23bdd530ec48b2d794e4a4f5999 Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Wed, 22 Jan 2025 16:53:13 +0100 Subject: [PATCH 2/7] Fix code formatting. --- sycl/source/detail/queue_impl.cpp | 3 ++- sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index de8aa37a59e31..b15f746abd3db 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -494,7 +494,8 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, std::vector ExpandedDepEventImplPtrs; ExpandedDepEventImplPtrs.reserve(ExpandedDepEvents.size()); for (const event &DepEvent : ExpandedDepEvents) - ExpandedDepEventImplPtrs.push_back(detail::getSyclObjImpl(DepEvent)); + ExpandedDepEventImplPtrs.push_back( + detail::getSyclObjImpl(DepEvent)); EventImpl->getPreparedDepsEvents() = ExpandedDepEventImplPtrs; EventImpl->cleanDepEventsThroughOneLevel(); diff --git a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp index 9a3145e92b733..29e0548945d66 100644 --- a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp +++ b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp @@ -332,7 +332,8 @@ TEST_F(DependsOnTests, ShortcutFunctionWithWaitList) { ASSERT_NE(Cmd, nullptr); Cmd->MIsBlockable = true; Cmd->MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueBlocked; - // for the test functionality, depenent task HostTaskEvent must be treated as incompleted + // for the test functionality, depenent task HostTaskEvent must be treated as + // incompleted HostTaskEventImpl->setStateIncomplete(); auto SingleTaskEvent = Queue.submit([&](sycl::handler &cgh) { From cd38e7d74d66dbfa8e90ddc56bbdcf669c08821c Mon Sep 17 00:00:00 2001 From: Alexandr-Konovalov Date: Wed, 22 Jan 2025 19:10:12 +0100 Subject: [PATCH 3/7] Update sycl/source/detail/queue_impl.cpp Co-authored-by: Sergey Semenov --- sycl/source/detail/queue_impl.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index b15f746abd3db..a7125682fd074 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -491,13 +491,12 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, EventImpl->setEnqueued(); // connect returned event with dependent events if (!isInOrder()) { - std::vector ExpandedDepEventImplPtrs; + std::vector &ExpandedDepEventImplPtrs = EventImpl->getPreparedDepsEvents(); ExpandedDepEventImplPtrs.reserve(ExpandedDepEvents.size()); for (const event &DepEvent : ExpandedDepEvents) ExpandedDepEventImplPtrs.push_back( detail::getSyclObjImpl(DepEvent)); - - EventImpl->getPreparedDepsEvents() = ExpandedDepEventImplPtrs; + EventImpl->cleanDepEventsThroughOneLevel(); } } From a4f857add54cbd7c38588bb7d3321dff38b8d2d8 Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Wed, 29 Jan 2025 12:53:22 +0100 Subject: [PATCH 4/7] Fix test-e2e. --- sycl/test-e2e/XPTI/basic_event_collection_linux.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp b/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp index 0e9be2ed889fb..4dfe5928bd5ee 100644 --- a/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp +++ b/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp @@ -32,12 +32,6 @@ // CHECK-DAG: from_source : false // CHECK-DAG: kernel_name : typeinfo name for main::{lambda(sycl::_V1::handler&)#1}::operator()(sycl::_V1::handler&) const::{lambda()#1} // CHECK-DAG: sycl_device : {{.*}} -// CHECK: Node create -// CHECK-DAG: queue_id : {{.*}} -// CHECK-DAG: kernel_name : virtual_node[{{.*}}] -// CHECK-NEXT: Edge create -// CHECK-DAG: queue_id : {{.*}} -// CHECK-DAG: event : {{.*}} // CHECK: Task begin // CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: sym_line_no : {{.*}} From 9227c426151ab7b8148f37d1fb67704dd552d16a Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Wed, 29 Jan 2025 12:53:47 +0100 Subject: [PATCH 5/7] Fix code formatting. --- sycl/source/detail/queue_impl.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index a7125682fd074..37d899a621971 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -491,12 +491,13 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, EventImpl->setEnqueued(); // connect returned event with dependent events if (!isInOrder()) { - std::vector &ExpandedDepEventImplPtrs = EventImpl->getPreparedDepsEvents(); + std::vector &ExpandedDepEventImplPtrs = + EventImpl->getPreparedDepsEvents(); ExpandedDepEventImplPtrs.reserve(ExpandedDepEvents.size()); for (const event &DepEvent : ExpandedDepEvents) ExpandedDepEventImplPtrs.push_back( detail::getSyclObjImpl(DepEvent)); - + EventImpl->cleanDepEventsThroughOneLevel(); } } From 13823934e92609b46a3998032a7440252e80ebdb Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Wed, 29 Jan 2025 14:04:51 +0100 Subject: [PATCH 6/7] Small fixes. --- sycl/source/detail/queue_impl.cpp | 2 +- sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 37d899a621971..1d536fd04961f 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -505,7 +505,7 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, if (isInOrder()) { auto &EventToStoreIn = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr : MExtGraphDeps.LastEventPtr; - EventToStoreIn = EventImpl; + EventToStoreIn = std::move(EventImpl); } // Track only if we won't be able to handle it with urQueueFinish. if (MEmulateOOO) diff --git a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp index 29e0548945d66..8c5bd97eb2ae6 100644 --- a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp +++ b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp @@ -323,6 +323,7 @@ TEST_F(DependsOnTests, ShortcutFunctionWithWaitList) { &redefinedextUSMEnqueueMemcpy); sycl::queue Queue = detail::createSyclObjFromImpl(QueueDevImpl); + // Mock up an incomplete host task auto HostTaskEvent = Queue.submit([&](sycl::handler &cgh) { cgh.host_task([=]() {}); }); std::shared_ptr HostTaskEventImpl = @@ -332,8 +333,6 @@ TEST_F(DependsOnTests, ShortcutFunctionWithWaitList) { ASSERT_NE(Cmd, nullptr); Cmd->MIsBlockable = true; Cmd->MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueBlocked; - // for the test functionality, depenent task HostTaskEvent must be treated as - // incompleted HostTaskEventImpl->setStateIncomplete(); auto SingleTaskEvent = Queue.submit([&](sycl::handler &cgh) { From 6aebb446c2fedbb4a1cf9d50ff3efa160711de31 Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Thu, 6 Feb 2025 17:27:29 +0100 Subject: [PATCH 7/7] Eliminate excessive copying. --- sycl/source/detail/queue_impl.cpp | 4 ++-- sycl/source/detail/scheduler/scheduler.cpp | 4 ++-- sycl/source/detail/scheduler/scheduler.hpp | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 1d536fd04961f..559fbc5da26c5 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -481,7 +481,7 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, } event ResEvent = prepareSYCLEventAssociatedWithQueue(Self); - auto EventImpl = detail::getSyclObjImpl(ResEvent); + const auto &EventImpl = detail::getSyclObjImpl(ResEvent); { NestedCallsTracker tracker; ur_event_handle_t UREvent = nullptr; @@ -505,7 +505,7 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, if (isInOrder()) { auto &EventToStoreIn = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr : MExtGraphDeps.LastEventPtr; - EventToStoreIn = std::move(EventImpl); + EventToStoreIn = EventImpl; } // Track only if we won't be able to handle it with urQueueFinish. if (MEmulateOOO) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 77a066d283453..856cbdc3cca14 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -670,7 +670,7 @@ bool CheckEventReadiness(const ContextImplPtr &Context, } bool Scheduler::areEventsSafeForSchedulerBypass( - const std::vector &DepEvents, ContextImplPtr Context) { + const std::vector &DepEvents, const ContextImplPtr &Context) { return std::all_of( DepEvents.begin(), DepEvents.end(), [&Context](const sycl::event &Event) { @@ -680,7 +680,7 @@ bool Scheduler::areEventsSafeForSchedulerBypass( } bool Scheduler::areEventsSafeForSchedulerBypass( - const std::vector &DepEvents, ContextImplPtr Context) { + const std::vector &DepEvents, const ContextImplPtr &Context) { return std::all_of(DepEvents.begin(), DepEvents.end(), [&Context](const EventImplPtr &SyclEventImplPtr) { diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index c6d2d07600d12..f3ce947b32e5d 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -479,10 +479,10 @@ class Scheduler { static bool areEventsSafeForSchedulerBypass(const std::vector &DepEvents, - ContextImplPtr Context); + const ContextImplPtr &Context); static bool areEventsSafeForSchedulerBypass(const std::vector &DepEvents, - ContextImplPtr Context); + const ContextImplPtr &Context); protected: using RWLockT = std::shared_timed_mutex;