@@ -57,6 +57,10 @@ inline const char *nodeTypeToString(node_type NodeType) {
57
57
return " host_task" ;
58
58
case node_type::native_command:
59
59
return " native_command" ;
60
+ case node_type::async_malloc:
61
+ return " async_malloc" ;
62
+ case node_type::async_free:
63
+ return " async_free" ;
60
64
}
61
65
assert (false && " Unhandled node type" );
62
66
return {};
@@ -340,7 +344,7 @@ graph_impl::graph_impl(const sycl::context &SyclContext,
340
344
const sycl::device &SyclDevice,
341
345
const sycl::property_list &PropList)
342
346
: MContext(SyclContext), MDevice(SyclDevice), MRecordingQueues(),
343
- MEventsMap (), MInorderQueueMap(),
347
+ MEventsMap (), MInorderQueueMap(), MGraphMemPool(SyclContext, SyclDevice),
344
348
MID(NextAvailableID.fetch_add(1 , std::memory_order_relaxed)) {
345
349
checkGraphPropertiesAndThrow (PropList);
346
350
if (PropList.has_property <property::graph::no_cycle_check>()) {
@@ -750,12 +754,12 @@ void graph_impl::beginRecording(
750
754
}
751
755
}
752
756
753
- // Check if nodes are empty and if so loop back through predecessors until we
754
- // find the real dependency.
757
+ // Check if nodes do not require enqueueing and if so loop back through
758
+ // predecessors until we find the real dependency.
755
759
void exec_graph_impl::findRealDeps (
756
760
std::vector<ur_exp_command_buffer_sync_point_t > &Deps,
757
761
std::shared_ptr<node_impl> CurrentNode, int ReferencePartitionNum) {
758
- if (CurrentNode->isEmpty ()) {
762
+ if (! CurrentNode->requiresEnqueue ()) {
759
763
for (auto &N : CurrentNode->MPredecessors ) {
760
764
auto NodeImpl = N.lock ();
761
765
findRealDeps (Deps, NodeImpl, ReferencePartitionNum);
@@ -875,9 +879,9 @@ void exec_graph_impl::createCommandBuffers(
875
879
Partition->MCommandBuffers [Device] = OutCommandBuffer;
876
880
877
881
for (const auto &Node : Partition->MSchedule ) {
878
- // Empty nodes are not processed as other nodes, but only their
882
+ // Some nodes are not scheduled like other nodes, and only their
879
883
// dependencies are propagated in findRealDeps
880
- if (Node->isEmpty ())
884
+ if (! Node->requiresEnqueue ())
881
885
continue ;
882
886
883
887
sycl::detail::CGType type = Node->MCGType ;
@@ -943,6 +947,8 @@ exec_graph_impl::exec_graph_impl(sycl::context Context,
943
947
944
948
exec_graph_impl::~exec_graph_impl () {
945
949
try {
950
+ MGraphImpl->markExecGraphDestroyed ();
951
+
946
952
const sycl::detail::AdapterPtr &Adapter =
947
953
sycl::detail::getSyclObjImpl (MContext)->getAdapter ();
948
954
MSchedule.clear ();
@@ -952,6 +958,9 @@ exec_graph_impl::~exec_graph_impl() {
952
958
Event->wait (Event);
953
959
}
954
960
961
+ // Clean up any graph-owned allocations that were allocated
962
+ MGraphImpl->getMemPool ().deallocateAndUnmapAll ();
963
+
955
964
for (const auto &Partition : MPartitions) {
956
965
Partition->MSchedule .clear ();
957
966
for (const auto &Iter : Partition->MCommandBuffers ) {
@@ -1870,6 +1879,14 @@ modifiable_command_graph::finalize(const sycl::property_list &PropList) const {
1870
1879
// Graph is read and written in this scope so we lock
1871
1880
// this graph with full priviledges.
1872
1881
graph_impl::WriteLock Lock (impl->MMutex );
1882
+ // If the graph uses graph-owned allocations and an executable graph already
1883
+ // exists we must throw an error.
1884
+ if (impl->getMemPool ().hasAllocations () && impl->getExecGraphCount () > 0 ) {
1885
+ throw sycl::exception (sycl::make_error_code (errc::invalid),
1886
+ " Graphs containing allocations can only have a "
1887
+ " single executable graph alive at any onc time." );
1888
+ }
1889
+
1873
1890
return command_graph<graph_state::executable>{
1874
1891
this ->impl , this ->impl ->getContext (), PropList};
1875
1892
}
@@ -1997,11 +2014,16 @@ executable_command_graph::executable_command_graph(
1997
2014
const property_list &PropList)
1998
2015
: impl(std::make_shared<detail::exec_graph_impl>(Ctx, Graph, PropList)) {
1999
2016
finalizeImpl (); // Create backend representation for executable graph
2017
+ // Mark that we have created an executable graph from the modifiable graph.
2018
+ Graph->markExecGraphCreated ();
2000
2019
}
2001
2020
2002
2021
void executable_command_graph::finalizeImpl () {
2003
2022
impl->makePartitions ();
2004
2023
2024
+ // Handle any work required for graph-owned memory allocations
2025
+ impl->finalizeMemoryAllocations ();
2026
+
2005
2027
auto Device = impl->getGraphImpl ()->getDevice ();
2006
2028
for (auto Partition : impl->getPartitions ()) {
2007
2029
if (!Partition->isHostTask ()) {
@@ -2029,6 +2051,13 @@ void executable_command_graph::update(const std::vector<node> &Nodes) {
2029
2051
impl->update (NodeImpls);
2030
2052
}
2031
2053
2054
+ size_t executable_command_graph::get_required_mem_size () const {
2055
+ // Since each graph has a unique mem pool, return the current memory usage for
2056
+ // now. This call my change if we move to being able to share memory between
2057
+ // unique graphs.
2058
+ return impl->getGraphImpl ()->getMemPool ().getMemUseCurrent ();
2059
+ }
2060
+
2032
2061
dynamic_parameter_base::dynamic_parameter_base (
2033
2062
command_graph<graph_state::modifiable> Graph)
2034
2063
: impl(std::make_shared<dynamic_parameter_impl>(
0 commit comments