From e6dc6d9a13764beed96e93e28bb44aef78f22b98 Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Tue, 18 Jun 2024 19:32:28 +0000 Subject: [PATCH] [L0] Make all API functions operating on queue virtual `ur_queue_handle_t_` is now an abstract class and both legacy and the new, optimized queue variants are expected to inherit from it and implement all the virtual methods. API functions that operate on queue, are auto-generated and now they only invoke virtual function on the queue. --- scripts/generate_code.py | 43 ++ scripts/json2src.py | 3 + scripts/templates/helper.py | 32 ++ scripts/templates/queue_api.cpp.mako | 37 ++ scripts/templates/queue_api.hpp.mako | 31 + source/adapters/level_zero/CMakeLists.txt | 5 + source/adapters/level_zero/enqueue_native.cpp | 11 +- source/adapters/level_zero/event.cpp | 23 +- source/adapters/level_zero/image.cpp | 30 +- source/adapters/level_zero/kernel.cpp | 64 ++- source/adapters/level_zero/memory.cpp | 209 ++----- source/adapters/level_zero/queue.cpp | 70 ++- source/adapters/level_zero/queue.hpp | 217 ++++++- source/adapters/level_zero/queue_api.cpp | 322 +++++++++++ source/adapters/level_zero/queue_api.hpp | 153 +++++ .../adapters/level_zero/v2/queue_factory.hpp | 38 ++ .../v2/queue_immediate_in_order.cpp | 537 ++++++++++++++++++ .../v2/queue_immediate_in_order.hpp | 207 +++++++ 18 files changed, 1764 insertions(+), 268 deletions(-) create mode 100644 scripts/templates/queue_api.cpp.mako create mode 100644 scripts/templates/queue_api.hpp.mako create mode 100644 source/adapters/level_zero/queue_api.cpp create mode 100644 source/adapters/level_zero/queue_api.hpp create mode 100644 source/adapters/level_zero/v2/queue_factory.hpp create mode 100644 source/adapters/level_zero/v2/queue_immediate_in_order.cpp create mode 100644 source/adapters/level_zero/v2/queue_immediate_in_order.hpp diff --git a/scripts/generate_code.py b/scripts/generate_code.py index bdaa475a3e..5ff832945b 100644 --- a/scripts/generate_code.py +++ b/scripts/generate_code.py @@ -454,3 +454,46 @@ def generate_tools(path, section, namespace, tags, version, specs, meta): loc += _mako_info_hpp(infodir, namespace, tags, version, specs, meta) print("TOOLS Generated %s lines of code.\n" % loc) + +""" +Entry-point: + generates API functions that accept queue for level_zero +""" +def generate_level_zero_queue_api(path, section, namespace, tags, version, specs, meta): + template = "queue_api.cpp.mako" + fin = os.path.join("templates", template) + + name = "queue_api" + filename = "queue_api.cpp" + layer_dstpath = os.path.join(path, "adapters/level_zero") + os.makedirs(layer_dstpath, exist_ok=True) + fout = os.path.join(layer_dstpath, filename) + + print("Generating %s..." % fout) + + loc = util.makoWrite( + fin, fout, + ver=version, + name = name, + namespace=namespace, + tags=tags, + specs=specs, + meta=meta) + + template = "queue_api.hpp.mako" + fin = os.path.join("templates", template) + + filename = "queue_api.hpp" + fout = os.path.join(layer_dstpath, filename) + + print("Generating %s..." % fout) + + loc += util.makoWrite( + fin, fout, + ver=version, + name = name, + namespace=namespace, + tags=tags, + specs=specs, + meta=meta) + print("QUEUE Generated %s lines of code.\n" % loc) diff --git a/scripts/json2src.py b/scripts/json2src.py index d116e76426..df11f879ac 100755 --- a/scripts/json2src.py +++ b/scripts/json2src.py @@ -31,6 +31,7 @@ def add_argument(parser, name, help, default=False): add_argument(parser, "adapters", "generation of null adapter files.", True) add_argument(parser, "common", "generation of common files.", True) add_argument(parser, "tools", "generation of common files.", True) + add_argument(parser, "l0_queue", "generation of l0 queue abstractions.", True) parser.add_argument("--debug", action='store_true', help="dump intermediate data to disk.") parser.add_argument("--sections", type=list, default=None, help="Optional list of sections for which to generate source, default is all") parser.add_argument("--ver", type=str, default="1.0", help="specification version to generate.") @@ -60,6 +61,8 @@ def add_argument(parser, name, help, default=False): generate_code.generate_common(srcpath, config['name'], config['namespace'], config['tags'], args.ver, specs, input['meta']) if args.tools: generate_code.generate_tools(toolspath, config['name'], config['namespace'], config['tags'], args.ver, specs, input['meta']) + if args.l0_queue: + generate_code.generate_level_zero_queue_api(srcpath, config['name'], config['namespace'], config['tags'], args.ver, specs, input['meta']) if args.debug: util.makoFileListWrite("generated.json") diff --git a/scripts/templates/helper.py b/scripts/templates/helper.py index 0c90f4da8e..9b157c1259 100644 --- a/scripts/templates/helper.py +++ b/scripts/templates/helper.py @@ -1604,3 +1604,35 @@ def get_handle_create_get_retain_release_functions(specs, namespace, tags): records.append(record) return records + +""" +Public: + returns a list of objects representing functions that accept $x_queue_handle_t as a first param +""" +def get_queue_related_functions(specs, namespace, tags): + funcs = [] + for s in specs: + for obj in s['objects']: + if re.match(r"function", obj['type']): + if obj['params'] and obj['params'][0]['type'] == '$x_queue_handle_t': + funcs.append(obj) + return funcs + +""" +Public: + transform a queue related function using following rules: + - remove $x prefix + - make first letter lowercase + - remove first param (queue) +""" +def transform_queue_related_function_name(namespace, tags, obj, format = ["name", "type"]): + function_name = make_func_name(namespace, tags, obj).replace(namespace,'') + function_name=function_name[0].lower() + function_name[1:] + + if obj['params'][0]['type'] != '$x_queue_handle_t': + raise ValueError('First parameter is not a queue handle') + + params = make_param_lines(namespace, tags, obj, format=format) + params = params[1:] + + return "{}({})".format(function_name, ", ".join(params)) diff --git a/scripts/templates/queue_api.cpp.mako b/scripts/templates/queue_api.cpp.mako new file mode 100644 index 0000000000..f941c7ba03 --- /dev/null +++ b/scripts/templates/queue_api.cpp.mako @@ -0,0 +1,37 @@ +<%! +import re +from templates import helper as th +%><% + n=namespace + N=n.upper() + + x=tags['$x'] + X=x.upper() +%>/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM + * Exceptions. See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file ${name}.cpp + * + */ + +#include "queue_api.hpp" + +ur_queue_handle_t_::~ur_queue_handle_t_() {} + +## FUNCTION ################################################################### +%for obj in th.get_queue_related_functions(specs, n, tags): +${X}_APIEXPORT ${x}_result_t ${X}_APICALL +${th.make_func_name(n, tags, obj)}( + %for line in th.make_param_lines(n, tags, obj, format=["name", "type", "delim"]): + ${line} + %endfor + ) +{ + return ${obj['params'][0]['name']}->${th.transform_queue_related_function_name(n, tags, obj, format=["name"])}; +} +%endfor diff --git a/scripts/templates/queue_api.hpp.mako b/scripts/templates/queue_api.hpp.mako new file mode 100644 index 0000000000..dcc86265f7 --- /dev/null +++ b/scripts/templates/queue_api.hpp.mako @@ -0,0 +1,31 @@ +<%! +import re +from templates import helper as th +%><% + n=namespace + N=n.upper() + + x=tags['$x'] + X=x.upper() +%>/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM + * Exceptions. See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file ${name}.hpp + * + */ + +#pragma once + +#include + +struct ur_queue_handle_t_ { + virtual ~ur_queue_handle_t_(); + %for obj in th.get_queue_related_functions(specs, n, tags): + virtual ${x}_result_t ${th.transform_queue_related_function_name(n, tags, obj, format=["type"])} = 0; + %endfor +}; diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index 5827452e01..59a8e19c78 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -110,8 +110,11 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.hpp ${CMAKE_CURRENT_SOURCE_DIR}/platform.hpp ${CMAKE_CURRENT_SOURCE_DIR}/program.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/queue_api.hpp ${CMAKE_CURRENT_SOURCE_DIR}/queue.hpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_factory.hpp ${CMAKE_CURRENT_SOURCE_DIR}/ur_level_zero.cpp ${CMAKE_CURRENT_SOURCE_DIR}/common.cpp ${CMAKE_CURRENT_SOURCE_DIR}/context.cpp @@ -126,10 +129,12 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.cpp ${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/queue_api.cpp ${CMAKE_CURRENT_SOURCE_DIR}/queue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.cpp ) if(NOT WIN32) diff --git a/source/adapters/level_zero/enqueue_native.cpp b/source/adapters/level_zero/enqueue_native.cpp index b708333de7..b67cccc4f1 100644 --- a/source/adapters/level_zero/enqueue_native.cpp +++ b/source/adapters/level_zero/enqueue_native.cpp @@ -10,10 +10,11 @@ #include -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueNativeCommandExp( - ur_queue_handle_t, ur_exp_enqueue_native_command_function_t, void *, - uint32_t, const ur_mem_handle_t *, - const ur_exp_enqueue_native_command_properties_t *, uint32_t, - const ur_event_handle_t *, ur_event_handle_t *) { +#include "queue.hpp" + +ur_result_t ur_queue_handle_legacy_t_::enqueueNativeCommandExp( + ur_exp_enqueue_native_command_function_t, void *, uint32_t, + const ur_mem_handle_t *, const ur_exp_enqueue_native_command_properties_t *, + uint32_t, const ur_event_handle_t *, ur_event_handle_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 5881610f68..77c47d51aa 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -59,8 +59,8 @@ bool WaitListEmptyOrAllEventsFromSameQueue( return true; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object +ur_result_t ur_queue_handle_legacy_t_::enqueueEventsWait( ///< [in] handle of + ///< the queue object uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] @@ -72,7 +72,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; if (EventWaitList) { bool UseCopyEngine = false; @@ -152,8 +152,9 @@ static const bool InOrderBarrierBySignal = [] { return (UrRet ? std::atoi(UrRet) : true); }(); -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object +ur_result_t +ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the + ///< queue object uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] @@ -165,7 +166,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; // Lock automatically releases when this goes out of scope. std::scoped_lock lock(Queue->Mutex); @@ -661,8 +662,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object +ur_result_t ur_queue_handle_legacy_t_::enqueueTimestampRecordingExp( bool Blocking, ///< [in] blocking or non-blocking enqueue uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t @@ -676,7 +676,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( *OutEvent ///< [in,out] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; // Lock automatically releases when this goes out of scope. std::scoped_lock lock(Queue->Mutex); @@ -1022,7 +1022,6 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) { // Save pointer to the queue before deleting/resetting event. auto Queue = Legacy(Event->UrQueue); - auto URQueue = Event->UrQueue; // If the event was a timestamp recording, we try to evict its entry in the // queue. @@ -1054,8 +1053,8 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) { // created so that we can avoid ur_queue_handle_t is released before the // associated ur_event_handle_t is released. Here we have to decrement it so // ur_queue_handle_t can be released successfully. - if (URQueue) { - UR_CALL(urQueueReleaseInternal(URQueue)); + if (Queue) { + UR_CALL(urQueueReleaseInternal(Queue)); } return UR_RESULT_SUCCESS; diff --git a/source/adapters/level_zero/image.cpp b/source/adapters/level_zero/image.cpp index e37b00a33e..b0e058ffe2 100644 --- a/source/adapters/level_zero/image.cpp +++ b/source/adapters/level_zero/image.cpp @@ -748,14 +748,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( - ur_queue_handle_t hUrQueue, void *pDst, void *pSrc, - const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, - ur_exp_image_copy_flags_t imageCopyFlags, ur_rect_offset_t srcOffset, - ur_rect_offset_t dstOffset, ur_rect_region_t copyExtent, - ur_rect_region_t hostExtent, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - auto hQueue = Legacy(hUrQueue); +ur_result_t ur_queue_handle_legacy_t_::bindlessImagesImageCopyExp( + void *pDst, void *pSrc, const ur_image_format_t *pImageFormat, + const ur_image_desc_t *pImageDesc, ur_exp_image_copy_flags_t imageCopyFlags, + ur_rect_offset_t srcOffset, ur_rect_offset_t dstOffset, + ur_rect_region_t copyExtent, ur_rect_region_t hostExtent, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + auto hQueue = this; std::scoped_lock Lock(hQueue->Mutex); UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); @@ -1028,11 +1028,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesDestroyExternalSemaphoreExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( - ur_queue_handle_t hQueue, ur_exp_interop_semaphore_handle_t hSemaphore, - bool hasValue, uint64_t waitValue, uint32_t numEventsInWaitList, +ur_result_t ur_queue_handle_legacy_t_::bindlessImagesWaitExternalSemaphoreExp( + ur_exp_interop_semaphore_handle_t hSemaphore, bool hasValue, + uint64_t waitValue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = hQueue; std::ignore = hSemaphore; std::ignore = hasValue; std::ignore = waitValue; @@ -1044,11 +1043,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( - ur_queue_handle_t hQueue, ur_exp_interop_semaphore_handle_t hSemaphore, - bool hasValue, uint64_t signalValue, uint32_t numEventsInWaitList, +ur_result_t ur_queue_handle_legacy_t_::bindlessImagesSignalExternalSemaphoreExp( + ur_exp_interop_semaphore_handle_t hSemaphore, bool hasValue, + uint64_t signalValue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = hQueue; std::ignore = hSemaphore; std::ignore = hasValue; std::ignore = signalValue; diff --git a/source/adapters/level_zero/kernel.cpp b/source/adapters/level_zero/kernel.cpp index fa1b2f9192..a33e320cc4 100644 --- a/source/adapters/level_zero/kernel.cpp +++ b/source/adapters/level_zero/kernel.cpp @@ -101,8 +101,7 @@ ur_result_t getSuggestedLocalWorkSize(ur_queue_handle_legacy_t hQueue, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object +ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunch( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t WorkDim, ///< [in] number of dimensions, from 1 to 3, to specify ///< the global and work-group work-items @@ -131,7 +130,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular kernel execution instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; ze_kernel_handle_t ZeKernel{}; UR_CALL(getZeKernel(Queue, Kernel, &ZeKernel)); @@ -309,8 +308,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object +ur_result_t ur_queue_handle_legacy_t_::enqueueCooperativeKernelLaunchExp( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t WorkDim, ///< [in] number of dimensions, from 1 to 3, to specify ///< the global and work-group work-items @@ -339,7 +337,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular kernel execution instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; auto ZeDevice = Queue->Device->ZeDevice; ze_kernel_handle_t ZeKernel{}; @@ -571,8 +569,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue to submit to. +ur_result_t ur_queue_handle_legacy_t_::enqueueDeviceGlobalVariableWrite( ur_program_handle_t Program, ///< [in] handle of the program containing the ///< device global variable. const char @@ -593,7 +590,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( *Event ///< [in,out][optional] return an event object that identifies ///< this particular kernel execution instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; std::scoped_lock lock(Queue->Mutex); // Find global variable pointer @@ -621,29 +618,28 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( EventWaitList, Event, PreferCopyEngine); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue to submit to. - ur_program_handle_t Program, ///< [in] handle of the program containing the - ///< device global variable. - const char - *Name, ///< [in] the unique identifier for the device global variable. +ur_result_t ur_queue_handle_legacy_t_::enqueueDeviceGlobalVariableRead( + ur_program_handle_t Program, ///< [in] handle of the program containing + ///< the device global variable. + const char *Name, ///< [in] the unique identifier for the device global + ///< variable. bool BlockingRead, ///< [in] indicates if this operation should block. size_t Count, ///< [in] the number of bytes to copy. - size_t Offset, ///< [in] the byte offset into the device global variable to - ///< start copying. - void *Dst, ///< [in] pointer to where the data must be copied to. + size_t Offset, ///< [in] the byte offset into the device global variable + ///< to start copying. + void *Dst, ///< [in] pointer to where the data must be copied to. uint32_t NumEventsInWaitList, ///< [in] size of the event wait list. const ur_event_handle_t *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before the kernel execution. If nullptr, the - ///< numEventsInWaitList must be 0, indicating that no - ///< wait event. + ///< pointer to a list of events that must be + ///< complete before the kernel execution. If + ///< nullptr, the numEventsInWaitList must be 0, + ///< indicating that no wait event. ur_event_handle_t - *Event ///< [in,out][optional] return an event object that identifies - ///< this particular kernel execution instance. + *Event ///< [in,out][optional] return an event object that + ///< identifies this particular kernel execution instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; std::scoped_lock lock(Queue->Mutex); @@ -1206,3 +1202,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetSpecializationConstants( "{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + +ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunchCustomExp( + ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numPropsInLaunchPropList, + const ur_exp_launch_property_t *launchPropList, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = hKernel; + std::ignore = workDim; + std::ignore = pGlobalWorkSize; + std::ignore = pLocalWorkSize; + std::ignore = numPropsInLaunchPropList; + std::ignore = launchPropList; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/level_zero/memory.cpp b/source/adapters/level_zero/memory.cpp index 24b9c53afb..95650a7b94 100644 --- a/source/adapters/level_zero/memory.cpp +++ b/source/adapters/level_zero/memory.cpp @@ -461,9 +461,8 @@ static ur_result_t enqueueMemImageCommandHelper( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object +ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferRead( + ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object size_t size, ///< [in] size in bytes of data being read @@ -480,7 +479,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( *phEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; ur_mem_handle_t_ *Src = ur_cast(hBuffer); std::shared_lock SrcLock(Src->Mutex, std::defer_lock); @@ -496,9 +495,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( true /* PreferCopyEngine */); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object +ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferWrite( + ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object @@ -517,7 +515,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( *phEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; ur_mem_handle_t_ *Buffer = ur_cast(hBuffer); std::scoped_lock Lock(Queue->Mutex, @@ -534,9 +532,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( true /* PreferCopyEngine */); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object +ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferReadRect( + ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region @@ -563,7 +560,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( *phEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; ur_mem_handle_t_ *Buffer = ur_cast(hBuffer); std::shared_lock SrcLock(Buffer->Mutex, std::defer_lock); @@ -580,9 +577,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object +ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferWriteRect( + ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer @@ -611,7 +607,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( *phEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; ur_mem_handle_t_ *Buffer = ur_cast(hBuffer); std::scoped_lock Lock(Queue->Mutex, @@ -628,8 +624,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object +ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferCopy( ur_mem_handle_t BufferSrc, ///< [in] handle of the src buffer object ur_mem_handle_t BufferDst, ///< [in] handle of the dest buffer object size_t SrcOffset, ///< [in] offset into hBufferSrc to begin copying from @@ -647,7 +642,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; _ur_buffer *SrcBuffer = ur_cast<_ur_buffer *>(BufferSrc); _ur_buffer *DstBuffer = ur_cast<_ur_buffer *>(BufferDst); @@ -680,8 +675,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( OutEvent, PreferCopyEngine); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object +ur_result_t +ur_queue_handle_legacy_t_::enqueueMemBufferCopyRect( ///< [in] handle of the + ///< queue object ur_mem_handle_t BufferSrc, ///< [in] handle of the source buffer object ur_mem_handle_t BufferDst, ///< [in] handle of the dest buffer object ur_rect_offset_t SrcOrigin, ///< [in] 3D offset in the source buffer @@ -708,7 +704,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; _ur_buffer *SrcBuffer = ur_cast<_ur_buffer *>(BufferSrc); _ur_buffer *DstBuffer = ur_cast<_ur_buffer *>(BufferDst); @@ -739,12 +735,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( NumEventsInWaitList, EventWaitList, OutEvent, PreferCopyEngine); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object - ur_mem_handle_t Buffer, ///< [in] handle of the buffer object - const void *Pattern, ///< [in] pointer to the fill pattern - size_t PatternSize, ///< [in] size in bytes of the pattern - size_t Offset, ///< [in] offset into the buffer +ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferFill( + ur_mem_handle_t Buffer, ///< [in] handle of the buffer object + const void *Pattern, ///< [in] pointer to the fill pattern + size_t PatternSize, ///< [in] size in bytes of the pattern + size_t Offset, ///< [in] offset into the buffer size_t Size, ///< [in] fill size in bytes, must be a multiple of patternSize uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t @@ -758,7 +753,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; std::scoped_lock Lock(Queue->Mutex, Buffer->Mutex); @@ -773,9 +768,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( Size, NumEventsInWaitList, EventWaitList, OutEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object - ur_mem_handle_t Image, ///< [in] handle of the image object +ur_result_t ur_queue_handle_legacy_t_::enqueueMemImageRead( + ur_mem_handle_t Image, ///< [in] handle of the image object bool BlockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t Origin, ///< [in] defines the (x,y,z) offset in pixels in ///< the 1D, 2D, or 3D image @@ -796,7 +790,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; std::scoped_lock Lock(Queue->Mutex, Image->Mutex); return enqueueMemImageCommandHelper( @@ -805,9 +799,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( EventWaitList, OutEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object - ur_mem_handle_t Image, ///< [in] handle of the image object +ur_result_t ur_queue_handle_legacy_t_::enqueueMemImageWrite( + ur_mem_handle_t Image, ///< [in] handle of the image object bool BlockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t Origin, ///< [in] defines the (x,y,z) offset in pixels in @@ -829,7 +822,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; std::scoped_lock Lock(Queue->Mutex, Image->Mutex); return enqueueMemImageCommandHelper( @@ -838,8 +831,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( EventWaitList, OutEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object +ur_result_t +ur_queue_handle_legacy_t_::enqueueMemImageCopy( ///< [in] handle of + ///< the queue object ur_mem_handle_t ImageSrc, ///< [in] handle of the src image object ur_mem_handle_t ImageDst, ///< [in] handle of the dest image object ur_rect_offset_t SrcOrigin, ///< [in] defines the (x,y,z) offset in pixels @@ -860,7 +854,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; std::shared_lock SrcLock(ImageSrc->Mutex, std::defer_lock); std::scoped_lock, ur_shared_mutex, ur_shared_mutex> @@ -878,9 +872,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( NumEventsInWaitList, EventWaitList, OutEvent, PreferCopyEngine); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object - ur_mem_handle_t Buf, ///< [in] handle of the buffer object +ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferMap( + ur_mem_handle_t Buf, ///< [in] handle of the buffer object bool BlockingMap, ///< [in] indicates blocking (true), non-blocking (false) ur_map_flags_t MapFlags, ///< [in] flags for read, write, readwrite mapping size_t Offset, ///< [in] offset in bytes of the buffer region being mapped @@ -899,7 +892,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( void **RetMap ///< [in,out] return mapped pointer. TODO: move it before ///< numEventsInWaitList? ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; auto Buffer = ur_cast<_ur_buffer *>(Buf); UR_ASSERT(!Buffer->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); @@ -961,7 +954,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( UR_CALL(urEventWait(NumEventsInWaitList, EventWaitList)); if (Queue->isInOrderQueue()) - UR_CALL(urQueueFinish(UrQueue)); + UR_CALL(urQueueFinish(Queue)); // Lock automatically releases when this goes out of scope. std::scoped_lock Guard(Buffer->Mutex); @@ -1047,8 +1040,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object +ur_result_t ur_queue_handle_legacy_t_::enqueueMemUnmap( ur_mem_handle_t Mem, ///< [in] handle of the memory (buffer or image) object void *MappedPtr, ///< [in] mapped host address uint32_t NumEventsInWaitList, ///< [in] size of the event wait list @@ -1063,7 +1055,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; UR_ASSERT(!Mem->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); auto Buffer = ur_cast<_ur_buffer *>(Mem); @@ -1118,7 +1110,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( UR_CALL(urEventWait(NumEventsInWaitList, EventWaitList)); if (Queue->isInOrderQueue()) - UR_CALL(urQueueFinish(UrQueue)); + UR_CALL(urQueueFinish(Queue)); char *ZeHandleDst; UR_CALL(Buffer->getZeHandle(ZeHandleDst, ur_mem_handle_t_::write_only, @@ -1175,38 +1167,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemset( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object - void *Ptr, ///< [in] pointer to USM memory object - int8_t ByteValue, ///< [in] byte value to fill - size_t Count, ///< [in] size in bytes to be set - uint32_t NumEventsInWaitList, ///< [in] size of the event wait list - const ur_event_handle_t - *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before this command can be executed. If nullptr, - ///< the numEventsInWaitList must be 0, indicating - ///< that this command does not wait on any event to - ///< complete. - ur_event_handle_t *Event ///< [in,out][optional] return an event object that - ///< identifies this particular command instance. -) { - auto Queue = Legacy(UrQueue); - std::ignore = Queue; - std::ignore = Ptr; - std::ignore = ByteValue; - std::ignore = Count; - std::ignore = NumEventsInWaitList; - std::ignore = EventWaitList; - std::ignore = Event; - logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"), - "{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object - bool Blocking, ///< [in] blocking or non-blocking copy +ur_result_t ur_queue_handle_legacy_t_::enqueueUSMMemcpy( + bool Blocking, ///< [in] blocking or non-blocking copy void *Dst, ///< [in] pointer to the destination USM memory object const void *Src, ///< [in] pointer to the source USM memory object size_t Size, ///< [in] size in bytes to be copied @@ -1222,7 +1184,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; std::scoped_lock lock(Queue->Mutex); // Device to Device copies are found to execute slower on copy engine @@ -1238,8 +1200,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( NumEventsInWaitList, EventWaitList, OutEvent, PreferCopyEngine); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object +ur_result_t ur_queue_handle_legacy_t_::enqueueUSMPrefetch( const void *Mem, ///< [in] pointer to the USM memory object size_t Size, ///< [in] size in bytes to be fetched ur_usm_migration_flags_t Flags, ///< [in] USM prefetch flags @@ -1255,7 +1216,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; std::ignore = Flags; // Lock automatically releases when this goes out of scope. std::scoped_lock lock(Queue->Mutex); @@ -1307,8 +1268,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object +ur_result_t ur_queue_handle_legacy_t_::enqueueUSMAdvise( const void *Mem, ///< [in] pointer to the USM memory object size_t Size, ///< [in] size in bytes to be advised ur_usm_advice_flags_t Advice, ///< [in] USM memory advice @@ -1316,7 +1276,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; // Lock automatically releases when this goes out of scope. std::scoped_lock lock(Queue->Mutex); @@ -1366,9 +1326,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( - ur_queue_handle_t Queue, ///< [in] handle of the queue to submit to. - void *Mem, ///< [in] pointer to memory to be filled. +ur_result_t ur_queue_handle_legacy_t_::enqueueUSMFill2D( + void *Mem, ///< [in] pointer to memory to be filled. size_t Pitch, ///< [in] the total width of the destination memory including ///< padding. size_t PatternSize, ///< [in] the size in bytes of the pattern. @@ -1386,7 +1345,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular kernel execution instance. ) { - std::ignore = Queue; std::ignore = Mem; std::ignore = Pitch; std::ignore = PatternSize; @@ -1401,41 +1359,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemset2D( - ur_queue_handle_t Queue, ///< [in] handle of the queue to submit to. - void *Mem, ///< [in] pointer to memory to be filled. - size_t Pitch, ///< [in] the total width of the destination memory including - ///< padding. - int Value, ///< [in] the value to fill into the region in pMem. - size_t Width, ///< [in] the width in bytes of each row to set. - size_t Height, ///< [in] the height of the columns to set. - uint32_t NumEventsInWaitList, ///< [in] size of the event wait list - const ur_event_handle_t - *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before the kernel execution. If nullptr, the - ///< numEventsInWaitList must be 0, indicating that no - ///< wait event. - ur_event_handle_t - *OutEvent ///< [in,out][optional] return an event object that identifies - ///< this particular kernel execution instance. -) { - std::ignore = Queue; - std::ignore = Mem; - std::ignore = Pitch; - std::ignore = Value; - std::ignore = Width; - std::ignore = Height; - std::ignore = NumEventsInWaitList; - std::ignore = EventWaitList; - std::ignore = OutEvent; - logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"), - "{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue to submit to. +ur_result_t ur_queue_handle_legacy_t_::enqueueUSMMemcpy2D( bool Blocking, ///< [in] indicates if this operation should block the host. void *Dst, ///< [in] pointer to memory where data will be copied. size_t DstPitch, ///< [in] the total width of the source memory including @@ -1456,7 +1380,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( *Event ///< [in,out][optional] return an event object that identifies ///< this particular kernel execution instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; ur_rect_offset_t ZeroOffset{0, 0, 0}; ur_rect_region_t Region{Width, Height, 0}; @@ -2353,9 +2277,8 @@ size_t _ur_buffer::getAlignment() const { return Alignment; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object - void *Ptr, ///< [in] pointer to USM memory object +ur_result_t ur_queue_handle_legacy_t_::enqueueUSMFill( + void *Ptr, ///< [in] pointer to USM memory object size_t PatternSize, ///< [in] the size in bytes of the pattern. Must be a ///< power of 2 and less than or equal to width. const void *Pattern, ///< [in] pointer with the bytes of the pattern to set. @@ -2371,7 +2294,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( ur_event_handle_t *Event ///< [out][optional] return an event object that ///< identifies this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; std::scoped_lock Lock(Queue->Mutex); return enqueueMemFillHelper( @@ -2383,13 +2306,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( } /// Host Pipes -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( - ur_queue_handle_t UrQueue, ur_program_handle_t hProgram, - const char *pipe_symbol, bool blocking, void *pDst, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - auto hQueue = Legacy(UrQueue); - std::ignore = hQueue; +ur_result_t ur_queue_handle_legacy_t_::enqueueReadHostPipe( + ur_program_handle_t hProgram, const char *pipe_symbol, bool blocking, + void *pDst, size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { std::ignore = hProgram; std::ignore = pipe_symbol; std::ignore = blocking; @@ -2403,13 +2323,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t urEnqueueWriteHostPipe( - ur_queue_handle_t UrQueue, ur_program_handle_t hProgram, - const char *pipe_symbol, bool blocking, void *pSrc, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - auto hQueue = Legacy(UrQueue); - std::ignore = hQueue; +ur_result_t ur_queue_handle_legacy_t_::enqueueWriteHostPipe( + ur_program_handle_t hProgram, const char *pipe_symbol, bool blocking, + void *pSrc, size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { std::ignore = hProgram; std::ignore = pipe_symbol; std::ignore = blocking; diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index 769a321766..00e8c0f718 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -24,6 +24,8 @@ #include "ur_util.hpp" #include "ze_api.h" +#include "v2/queue_factory.hpp" + // Hard limit for the event completion batches. static const uint64_t CompletionBatchesMax = [] { // Default value chosen empirically to maximize the number of asynchronous @@ -342,8 +344,7 @@ ur_result_t resetCommandLists(ur_queue_handle_legacy_t Queue) { return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object +ur_result_t ur_queue_handle_legacy_t_::queueGetInfo( ur_queue_info_t ParamName, ///< [in] name of the queue property to query size_t ParamValueSize, ///< [in] size in bytes of the queue property value ///< provided @@ -351,7 +352,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo( size_t *ParamValueSizeRet ///< [out] size in bytes returned in queue ///< property value ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; std::shared_lock Lock(Queue->Mutex); UrReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet); @@ -504,6 +505,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( UR_ASSERT(Context->isValidDevice(Device), UR_RESULT_ERROR_INVALID_DEVICE); + // optimized path for immediate, in-order command lists + if (v2::shouldUseQueueV2(Device, Flags)) { + *Queue = v2::createQueue(Context, Device, Flags); + return UR_RESULT_SUCCESS; + } + // Create placeholder queues in the compute queue group. // Actual L0 queues will be created at first use. std::vector ZeComputeCommandQueues( @@ -529,9 +536,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( nullptr); try { - *Queue = new ur_queue_handle_t_( - std::in_place_type, ZeComputeCommandQueues, - ZeCopyCommandQueues, Context, Device, true, Flags, ForceComputeIndex); + *Queue = new ur_queue_handle_legacy_t_(ZeComputeCommandQueues, + ZeCopyCommandQueues, Context, Device, + true, Flags, ForceComputeIndex); } catch (const std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } catch (...) { @@ -581,10 +588,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain( - ur_queue_handle_t UrQueue ///< [in] handle of the queue object to get access -) { - auto Queue = Legacy(UrQueue); +ur_result_t ur_queue_handle_legacy_t_::queueRetain() { + auto Queue = this; + { std::scoped_lock Lock(Queue->Mutex); Queue->RefCountExternal++; @@ -593,10 +599,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease( - ur_queue_handle_t UrQueue ///< [in] handle of the queue object to release -) { - auto Queue = Legacy(UrQueue); +ur_result_t ur_queue_handle_legacy_t_::queueRelease() { + auto Queue = this; std::vector EventListToCleanup; { @@ -687,17 +691,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease( // (it was incremented when they were added to the command list). UR_CALL(urEventReleaseInternal(reinterpret_cast(Event))); } - UR_CALL(urQueueReleaseInternal(UrQueue)); + UR_CALL(urQueueReleaseInternal(Queue)); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urQueueGetNativeHandle( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue. +ur_result_t ur_queue_handle_legacy_t_::queueGetNativeHandle( ur_queue_native_desc_t *Desc, ur_native_handle_t *NativeQueue ///< [out] a pointer to the native handle of the queue. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; + // Lock automatically releases when this goes out of scope. std::shared_lock lock(Queue->Mutex); @@ -800,9 +804,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( std::vector CopyQueues; try { - ur_queue_handle_t_ *Queue = new ur_queue_handle_t_( - std::in_place_type, ComputeQueues, - CopyQueues, Context, UrDevice, OwnNativeHandle, Flags); + ur_queue_handle_t_ *Queue = new ur_queue_handle_legacy_t_( + ComputeQueues, CopyQueues, Context, UrDevice, OwnNativeHandle, Flags); *RetQueue = reinterpret_cast(Queue); } catch (const std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_RESOURCES; @@ -824,9 +827,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( std::vector ZeroCopyQueues; try { - ur_queue_handle_t_ *Queue = new ur_queue_handle_t_( - std::in_place_type, ZeQueues, - ZeroCopyQueues, Context, UrDevice, OwnNativeHandle, Flags); + ur_queue_handle_t_ *Queue = new ur_queue_handle_legacy_t_( + ZeQueues, ZeroCopyQueues, Context, UrDevice, OwnNativeHandle, Flags); *RetQueue = reinterpret_cast(Queue); } catch (const std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_RESOURCES; @@ -839,10 +841,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish( - ur_queue_handle_t UrQueue ///< [in] handle of the queue to be finished. -) { - auto Queue = Legacy(UrQueue); +ur_result_t ur_queue_handle_legacy_t_::queueFinish() { + auto Queue = this; if (Queue->UsingImmCmdLists) { // Lock automatically releases when this goes out of scope. std::scoped_lock Lock(Queue->Mutex); @@ -907,10 +907,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush( - ur_queue_handle_t UrQueue ///< [in] handle of the queue to be flushed. -) { - auto Queue = Legacy(UrQueue); +ur_result_t ur_queue_handle_legacy_t_::queueFlush() { + auto Queue = this; std::scoped_lock Lock(Queue->Mutex); return Queue->executeAllOpenCommandLists(); } @@ -1576,9 +1574,7 @@ void ur_queue_handle_legacy_t_::clearEndTimeRecordings() { EndTimeRecordings.clear(); } -ur_result_t urQueueReleaseInternal(ur_queue_handle_t UrQueue) { - ur_queue_handle_legacy_t Queue = Legacy(UrQueue); - +ur_result_t urQueueReleaseInternal(ur_queue_handle_legacy_t Queue) { if (!Queue->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; @@ -1612,7 +1608,7 @@ ur_result_t urQueueReleaseInternal(ur_queue_handle_t UrQueue) { Queue->CopyCommandBatch.NumTimesClosedFull, Queue->CopyCommandBatch.NumTimesClosedEarly); - delete UrQueue; + delete Queue; return UR_RESULT_SUCCESS; } @@ -1885,7 +1881,7 @@ ur_result_t createEventAndAssociateQueue(ur_queue_handle_legacy_t Queue, HostVisible.value(), Event, Queue->CounterBasedEventsEnabled)); - (*Event)->UrQueue = Queue->UnifiedHandle; + (*Event)->UrQueue = Queue; (*Event)->CommandType = CommandType; (*Event)->IsDiscarded = IsInternal; (*Event)->IsMultiDevice = IsMultiDevice; diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp index afd0d8975e..3759353783 100644 --- a/source/adapters/level_zero/queue.hpp +++ b/source/adapters/level_zero/queue.hpp @@ -26,20 +26,15 @@ #include "common.hpp" #include "device.hpp" +#include "queue_api.hpp" struct ur_queue_handle_legacy_t_; using ur_queue_handle_legacy_t = ur_queue_handle_legacy_t_ *; extern "C" { -ur_result_t urQueueReleaseInternal(ur_queue_handle_t Queue); +ur_result_t urQueueReleaseInternal(ur_queue_handle_legacy_t Queue); } // extern "C" -namespace v2 { -struct ur_queue_dispatcher_t { - // TODO -}; -} // namespace v2 - struct ur_completion_batch; using ur_completion_batch_list = std::list; using ur_completion_batch_it = ur_completion_batch_list::iterator; @@ -233,7 +228,7 @@ using ur_command_list_map_t = // The iterator pointing to a specific command-list in use. using ur_command_list_ptr_t = ur_command_list_map_t::iterator; -struct ur_queue_handle_legacy_t_ : _ur_object { +struct ur_queue_handle_legacy_t_ : _ur_object, public ur_queue_handle_t_ { ur_queue_handle_legacy_t_( std::vector &ComputeQueues, std::vector &CopyQueues, @@ -241,6 +236,190 @@ struct ur_queue_handle_legacy_t_ : _ur_object { bool OwnZeCommandQueue, ur_queue_flags_t Properties = 0, int ForceComputeIndex = -1); + ur_result_t queueGetInfo(ur_queue_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) override; + ur_result_t queueRetain() override; + ur_result_t queueRelease() override; + ur_result_t queueGetNativeHandle(ur_queue_native_desc_t *pDesc, + ur_native_handle_t *phNativeQueue) override; + ur_result_t queueFinish() override; + ur_result_t queueFlush() override; + ur_result_t enqueueKernelLaunch(ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, + const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueEventsWait(uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t + enqueueEventsWaitWithBarrier(uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferRead(ur_mem_handle_t hBuffer, bool blockingRead, + size_t offset, size_t size, void *pDst, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferWrite(ur_mem_handle_t hBuffer, bool blockingWrite, + size_t offset, size_t size, + const void *pSrc, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferReadRect( + ur_mem_handle_t hBuffer, bool blockingRead, ur_rect_offset_t bufferOrigin, + ur_rect_offset_t hostOrigin, ur_rect_region_t region, + size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, + size_t hostSlicePitch, void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferWriteRect( + ur_mem_handle_t hBuffer, bool blockingWrite, + ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferCopy(ur_mem_handle_t hBufferSrc, + ur_mem_handle_t hBufferDst, size_t srcOffset, + size_t dstOffset, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferCopyRect( + ur_mem_handle_t hBufferSrc, ur_mem_handle_t hBufferDst, + ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, + ur_rect_region_t region, size_t srcRowPitch, size_t srcSlicePitch, + size_t dstRowPitch, size_t dstSlicePitch, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferFill(ur_mem_handle_t hBuffer, + const void *pPattern, size_t patternSize, + size_t offset, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemImageRead(ur_mem_handle_t hImage, bool blockingRead, + ur_rect_offset_t origin, + ur_rect_region_t region, size_t rowPitch, + size_t slicePitch, void *pDst, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemImageWrite(ur_mem_handle_t hImage, bool blockingWrite, + ur_rect_offset_t origin, + ur_rect_region_t region, size_t rowPitch, + size_t slicePitch, void *pSrc, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t + enqueueMemImageCopy(ur_mem_handle_t hImageSrc, ur_mem_handle_t hImageDst, + ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, + ur_rect_region_t region, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferMap(ur_mem_handle_t hBuffer, bool blockingMap, + ur_map_flags_t mapFlags, size_t offset, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent, + void **ppRetMap) override; + ur_result_t enqueueMemUnmap(ur_mem_handle_t hMem, void *pMappedPtr, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueUSMFill(void *pMem, size_t patternSize, + const void *pPattern, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueUSMMemcpy(bool blocking, void *pDst, const void *pSrc, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueUSMFill2D(void *, size_t, size_t, const void *, size_t, + size_t, uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) override; + ur_result_t enqueueUSMMemcpy2D(bool, void *, size_t, const void *, size_t, + size_t, size_t, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) override; + ur_result_t enqueueUSMPrefetch(const void *pMem, size_t size, + ur_usm_migration_flags_t flags, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueUSMAdvise(const void *pMem, size_t size, + ur_usm_advice_flags_t advice, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueDeviceGlobalVariableWrite( + ur_program_handle_t hProgram, const char *name, bool blockingWrite, + size_t count, size_t offset, const void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueDeviceGlobalVariableRead( + ur_program_handle_t hProgram, const char *name, bool blockingRead, + size_t count, size_t offset, void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueReadHostPipe(ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pDst, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueWriteHostPipe(ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pSrc, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t bindlessImagesImageCopyExp( + void *pDst, void *pSrc, const ur_image_format_t *pImageFormat, + const ur_image_desc_t *pImageDesc, + ur_exp_image_copy_flags_t imageCopyFlags, ur_rect_offset_t srcOffset, + ur_rect_offset_t dstOffset, ur_rect_region_t copyExtent, + ur_rect_region_t hostExtent, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t bindlessImagesWaitExternalSemaphoreExp( + ur_exp_interop_semaphore_handle_t hSemaphore, bool hasWaitValue, + uint64_t waitValue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t bindlessImagesSignalExternalSemaphoreExp( + ur_exp_interop_semaphore_handle_t hSemaphore, bool hasSignalValue, + uint64_t signalValue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueCooperativeKernelLaunchExp( + ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t + enqueueTimestampRecordingExp(bool blocking, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueKernelLaunchCustomExp( + ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t numPropsInLaunchPropList, + const ur_exp_launch_property_t *launchPropList, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t + enqueueNativeCommandExp(ur_exp_enqueue_native_command_function_t, void *, + uint32_t, const ur_mem_handle_t *, + const ur_exp_enqueue_native_command_properties_t *, + uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) override; + using queue_type = ur_device_handle_t_::queue_group_info_t::type; // PI queue is in general a one to many mapping to L0 native queues. struct ur_queue_group_t { @@ -699,28 +878,12 @@ struct ur_queue_handle_legacy_t_ : _ur_object { // Threshold for cleaning up the EventList for immediate command lists. size_t getImmdCmmdListsEventCleanupThreshold(); - - // Pointer to the unified handle. - ur_queue_handle_t_ *UnifiedHandle; -}; - -// Unified handle that represents either legacy Queue or new dispatcher. -struct ur_queue_handle_t_ { - template - ur_queue_handle_t_(std::in_place_type_t tag, Args &&...args) - : Queue(tag, std::forward(args)...) { - if constexpr (std::is_same_v) { - std::get(Queue).UnifiedHandle = this; - } - } - - std::variant Queue; }; -template QueueT *GetQueue(ur_queue_handle_t Queue) { +template QueueT GetQueue(ur_queue_handle_t Queue) { if (!Queue) return nullptr; - auto *Q = std::get_if(&Queue->Queue); + auto *Q = dynamic_cast(Queue); if (!Q) { throw UR_RESULT_ERROR_INVALID_QUEUE; } @@ -728,7 +891,7 @@ template QueueT *GetQueue(ur_queue_handle_t Queue) { } static inline ur_queue_handle_legacy_t Legacy(ur_queue_handle_t Queue) { - return GetQueue(Queue); + return GetQueue(Queue); } // This helper function creates a ur_event_handle_t and associate a diff --git a/source/adapters/level_zero/queue_api.cpp b/source/adapters/level_zero/queue_api.cpp new file mode 100644 index 0000000000..622000a07f --- /dev/null +++ b/source/adapters/level_zero/queue_api.cpp @@ -0,0 +1,322 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM + * Exceptions. See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file queue_api.cpp + * + */ + +#include "queue_api.hpp" + +ur_queue_handle_t_::~ur_queue_handle_t_() {} + +UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, + ur_queue_info_t propName, + size_t propSize, + void *pPropValue, + size_t *pPropSizeRet) { + return hQueue->queueGetInfo(propName, propSize, pPropValue, pPropSizeRet); +} +UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain(ur_queue_handle_t hQueue) { + return hQueue->queueRetain(); +} +UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) { + return hQueue->queueRelease(); +} +UR_APIEXPORT ur_result_t UR_APICALL +urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *pDesc, + ur_native_handle_t *phNativeQueue) { + return hQueue->queueGetNativeHandle(pDesc, phNativeQueue); +} +UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(ur_queue_handle_t hQueue) { + return hQueue->queueFinish(); +} +UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush(ur_queue_handle_t hQueue) { + return hQueue->queueFlush(); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( + ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->enqueueKernelLaunch( + hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, + numEventsInWaitList, phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( + ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->enqueueEventsWait(numEventsInWaitList, phEventWaitList, + phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( + ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->enqueueEventsWaitWithBarrier(numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, + size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->enqueueMemBufferRead(hBuffer, blockingRead, offset, size, pDst, + numEventsInWaitList, phEventWaitList, + phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, + size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->enqueueMemBufferWrite(hBuffer, blockingWrite, offset, size, + pSrc, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, + ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pDst, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueMemBufferReadRect( + hBuffer, blockingRead, bufferOrigin, hostOrigin, region, bufferRowPitch, + bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, + ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueMemBufferWriteRect( + hBuffer, blockingWrite, bufferOrigin, hostOrigin, region, bufferRowPitch, + bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( + ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, + ur_mem_handle_t hBufferDst, size_t srcOffset, size_t dstOffset, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueMemBufferCopy(hBufferSrc, hBufferDst, srcOffset, + dstOffset, size, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( + ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, + ur_mem_handle_t hBufferDst, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, + size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueMemBufferCopyRect( + hBufferSrc, hBufferDst, srcOrigin, dstOrigin, region, srcRowPitch, + srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, const void *pPattern, + size_t patternSize, size_t offset, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueMemBufferFill(hBuffer, pPattern, patternSize, offset, + size, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( + ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingRead, + ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, + size_t slicePitch, void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->enqueueMemImageRead( + hImage, blockingRead, origin, region, rowPitch, slicePitch, pDst, + numEventsInWaitList, phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( + ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingWrite, + ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, + size_t slicePitch, void *pSrc, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->enqueueMemImageWrite( + hImage, blockingWrite, origin, region, rowPitch, slicePitch, pSrc, + numEventsInWaitList, phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( + ur_queue_handle_t hQueue, ur_mem_handle_t hImageSrc, + ur_mem_handle_t hImageDst, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueMemImageCopy(hImageSrc, hImageDst, srcOrigin, dstOrigin, + region, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingMap, + ur_map_flags_t mapFlags, size_t offset, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent, void **ppRetMap) { + return hQueue->enqueueMemBufferMap(hBuffer, blockingMap, mapFlags, offset, + size, numEventsInWaitList, phEventWaitList, + phEvent, ppRetMap); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( + ur_queue_handle_t hQueue, ur_mem_handle_t hMem, void *pMappedPtr, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueMemUnmap(hMem, pMappedPtr, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( + ur_queue_handle_t hQueue, void *pMem, size_t patternSize, + const void *pPattern, size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->enqueueUSMFill(pMem, patternSize, pPattern, size, + numEventsInWaitList, phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( + ur_queue_handle_t hQueue, bool blocking, void *pDst, const void *pSrc, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->enqueueUSMMemcpy(blocking, pDst, pSrc, size, + numEventsInWaitList, phEventWaitList, + phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( + ur_queue_handle_t hQueue, const void *pMem, size_t size, + ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->enqueueUSMPrefetch(pMem, size, flags, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL +urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, + ur_usm_advice_flags_t advice, ur_event_handle_t *phEvent) { + return hQueue->enqueueUSMAdvise(pMem, size, advice, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( + ur_queue_handle_t hQueue, void *pMem, size_t pitch, size_t patternSize, + const void *pPattern, size_t width, size_t height, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueUSMFill2D(pMem, pitch, patternSize, pPattern, width, + height, numEventsInWaitList, phEventWaitList, + phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( + ur_queue_handle_t hQueue, bool blocking, void *pDst, size_t dstPitch, + const void *pSrc, size_t srcPitch, size_t width, size_t height, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueUSMMemcpy2D(blocking, pDst, dstPitch, pSrc, srcPitch, + width, height, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, + bool blockingWrite, size_t count, size_t offset, const void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueDeviceGlobalVariableWrite( + hProgram, name, blockingWrite, count, offset, pSrc, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, + bool blockingRead, size_t count, size_t offset, void *pDst, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueDeviceGlobalVariableRead( + hProgram, name, blockingRead, count, offset, pDst, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, void *pDst, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueReadHostPipe(hProgram, pipe_symbol, blocking, pDst, + size, numEventsInWaitList, phEventWaitList, + phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, void *pSrc, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueWriteHostPipe(hProgram, pipe_symbol, blocking, pSrc, + size, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( + ur_queue_handle_t hQueue, void *pDst, void *pSrc, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_exp_image_copy_flags_t imageCopyFlags, ur_rect_offset_t srcOffset, + ur_rect_offset_t dstOffset, ur_rect_region_t copyExtent, + ur_rect_region_t hostExtent, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->bindlessImagesImageCopyExp( + pDst, pSrc, pImageFormat, pImageDesc, imageCopyFlags, srcOffset, + dstOffset, copyExtent, hostExtent, numEventsInWaitList, phEventWaitList, + phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( + ur_queue_handle_t hQueue, ur_exp_interop_semaphore_handle_t hSemaphore, + bool hasWaitValue, uint64_t waitValue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->bindlessImagesWaitExternalSemaphoreExp( + hSemaphore, hasWaitValue, waitValue, numEventsInWaitList, phEventWaitList, + phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( + ur_queue_handle_t hQueue, ur_exp_interop_semaphore_handle_t hSemaphore, + bool hasSignalValue, uint64_t signalValue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->bindlessImagesSignalExternalSemaphoreExp( + hSemaphore, hasSignalValue, signalValue, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( + ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->enqueueCooperativeKernelLaunchExp( + hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, + numEventsInWaitList, phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( + ur_queue_handle_t hQueue, bool blocking, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->enqueueTimestampRecordingExp(blocking, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp( + ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t numPropsInLaunchPropList, + const ur_exp_launch_property_t *launchPropList, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueKernelLaunchCustomExp( + hKernel, workDim, pGlobalWorkSize, pLocalWorkSize, + numPropsInLaunchPropList, launchPropList, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueNativeCommandExp( + ur_queue_handle_t hQueue, + ur_exp_enqueue_native_command_function_t pfnNativeEnqueue, void *data, + uint32_t numMemsInMemList, const ur_mem_handle_t *phMemList, + const ur_exp_enqueue_native_command_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueNativeCommandExp( + pfnNativeEnqueue, data, numMemsInMemList, phMemList, pProperties, + numEventsInWaitList, phEventWaitList, phEvent); +} diff --git a/source/adapters/level_zero/queue_api.hpp b/source/adapters/level_zero/queue_api.hpp new file mode 100644 index 0000000000..3c76901176 --- /dev/null +++ b/source/adapters/level_zero/queue_api.hpp @@ -0,0 +1,153 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM + * Exceptions. See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file queue_api.hpp + * + */ + +#pragma once + +#include + +struct ur_queue_handle_t_ { + virtual ~ur_queue_handle_t_(); + virtual ur_result_t queueGetInfo(ur_queue_info_t, size_t, void *, + size_t *) = 0; + virtual ur_result_t queueRetain() = 0; + virtual ur_result_t queueRelease() = 0; + virtual ur_result_t queueGetNativeHandle(ur_queue_native_desc_t *, + ur_native_handle_t *) = 0; + virtual ur_result_t queueFinish() = 0; + virtual ur_result_t queueFlush() = 0; + virtual ur_result_t enqueueKernelLaunch(ur_kernel_handle_t, uint32_t, + const size_t *, const size_t *, + const size_t *, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueEventsWait(uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueEventsWaitWithBarrier(uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueMemBufferRead(ur_mem_handle_t, bool, size_t, + size_t, void *, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueMemBufferWrite(ur_mem_handle_t, bool, size_t, + size_t, const void *, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t + enqueueMemBufferReadRect(ur_mem_handle_t, bool, ur_rect_offset_t, + ur_rect_offset_t, ur_rect_region_t, size_t, size_t, + size_t, size_t, void *, uint32_t, + const ur_event_handle_t *, ur_event_handle_t *) = 0; + virtual ur_result_t + enqueueMemBufferWriteRect(ur_mem_handle_t, bool, ur_rect_offset_t, + ur_rect_offset_t, ur_rect_region_t, size_t, size_t, + size_t, size_t, void *, uint32_t, + const ur_event_handle_t *, ur_event_handle_t *) = 0; + virtual ur_result_t enqueueMemBufferCopy(ur_mem_handle_t, ur_mem_handle_t, + size_t, size_t, size_t, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t + enqueueMemBufferCopyRect(ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t, + ur_rect_offset_t, ur_rect_region_t, size_t, size_t, + size_t, size_t, uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueMemBufferFill(ur_mem_handle_t, const void *, + size_t, size_t, size_t, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueMemImageRead(ur_mem_handle_t, bool, + ur_rect_offset_t, ur_rect_region_t, + size_t, size_t, void *, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueMemImageWrite(ur_mem_handle_t, bool, + ur_rect_offset_t, ur_rect_region_t, + size_t, size_t, void *, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueMemImageCopy(ur_mem_handle_t, ur_mem_handle_t, + ur_rect_offset_t, ur_rect_offset_t, + ur_rect_region_t, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueMemBufferMap(ur_mem_handle_t, bool, ur_map_flags_t, + size_t, size_t, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *, void **) = 0; + virtual ur_result_t enqueueMemUnmap(ur_mem_handle_t, void *, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueUSMFill(void *, size_t, const void *, size_t, + uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueUSMMemcpy(bool, void *, const void *, size_t, + uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueUSMPrefetch(const void *, size_t, + ur_usm_migration_flags_t, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueUSMAdvise(const void *, size_t, + ur_usm_advice_flags_t, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueUSMFill2D(void *, size_t, size_t, const void *, + size_t, size_t, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueUSMMemcpy2D(bool, void *, size_t, const void *, + size_t, size_t, size_t, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueDeviceGlobalVariableWrite( + ur_program_handle_t, const char *, bool, size_t, size_t, const void *, + uint32_t, const ur_event_handle_t *, ur_event_handle_t *) = 0; + virtual ur_result_t enqueueDeviceGlobalVariableRead( + ur_program_handle_t, const char *, bool, size_t, size_t, void *, uint32_t, + const ur_event_handle_t *, ur_event_handle_t *) = 0; + virtual ur_result_t enqueueReadHostPipe(ur_program_handle_t, const char *, + bool, void *, size_t, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueWriteHostPipe(ur_program_handle_t, const char *, + bool, void *, size_t, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t bindlessImagesImageCopyExp( + void *, void *, const ur_image_format_t *, const ur_image_desc_t *, + ur_exp_image_copy_flags_t, ur_rect_offset_t, ur_rect_offset_t, + ur_rect_region_t, ur_rect_region_t, uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t bindlessImagesWaitExternalSemaphoreExp( + ur_exp_interop_semaphore_handle_t, bool, uint64_t, uint32_t, + const ur_event_handle_t *, ur_event_handle_t *) = 0; + virtual ur_result_t bindlessImagesSignalExternalSemaphoreExp( + ur_exp_interop_semaphore_handle_t, bool, uint64_t, uint32_t, + const ur_event_handle_t *, ur_event_handle_t *) = 0; + virtual ur_result_t enqueueCooperativeKernelLaunchExp( + ur_kernel_handle_t, uint32_t, const size_t *, const size_t *, + const size_t *, uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueTimestampRecordingExp(bool, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueKernelLaunchCustomExp( + ur_kernel_handle_t, uint32_t, const size_t *, const size_t *, uint32_t, + const ur_exp_launch_property_t *, uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t + enqueueNativeCommandExp(ur_exp_enqueue_native_command_function_t, void *, + uint32_t, const ur_mem_handle_t *, + const ur_exp_enqueue_native_command_properties_t *, + uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) = 0; +}; diff --git a/source/adapters/level_zero/v2/queue_factory.hpp b/source/adapters/level_zero/v2/queue_factory.hpp new file mode 100644 index 0000000000..0120df5f30 --- /dev/null +++ b/source/adapters/level_zero/v2/queue_factory.hpp @@ -0,0 +1,38 @@ +//===--------- queue_factory.cpp - Level Zero Adapter --------------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "../queue.hpp" + +#include "queue_immediate_in_order.hpp" + +namespace v2 { + +inline bool shouldUseQueueV2(ur_device_handle_t Device, + ur_queue_flags_t Flags) { + const char *UrRet = std::getenv("UR_L0_USE_QUEUE_V2"); + + // only support immediate, in-order for now + return UrRet && std::stoi(UrRet) && Device->useImmediateCommandLists() && + (Flags & UR_QUEUE_FLAG_SUBMISSION_BATCHED) == 0 && + (Flags & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) == 0; +} + +inline ur_queue_handle_t createQueue(ur_context_handle_t Context, + ur_device_handle_t Device, + ur_queue_flags_t Flags) { + if (!shouldUseQueueV2(Device, Flags)) { + throw UR_RESULT_ERROR_INVALID_ARGUMENT; + } + return new ur_queue_immediate_in_order_t(Context, Device, Flags); +} + +} // namespace v2 diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp new file mode 100644 index 0000000000..4428c34aa0 --- /dev/null +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp @@ -0,0 +1,537 @@ +//===--------- queue_immediate_in_order.cpp - Level Zero Adapter ---------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "queue_immediate_in_order.hpp" + +namespace v2 { +ur_queue_immediate_in_order_t::ur_queue_immediate_in_order_t( + ur_context_handle_t, ur_device_handle_t, ur_queue_flags_t) {} + +ur_result_t +ur_queue_immediate_in_order_t::queueGetInfo(ur_queue_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { + std::ignore = propName; + std::ignore = propSize; + std::ignore = pPropValue; + std::ignore = pPropSizeRet; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::queueRetain() { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::queueRelease() { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::queueGetNativeHandle( + ur_queue_native_desc_t *pDesc, ur_native_handle_t *phNativeQueue) { + std::ignore = pDesc; + std::ignore = phNativeQueue; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::queueFinish() { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::queueFlush() { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueKernelLaunch( + ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hKernel; + std::ignore = workDim; + std::ignore = pGlobalWorkOffset; + std::ignore = pGlobalWorkSize; + std::ignore = pLocalWorkSize; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWait( + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWaitWithBarrier( + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferRead( + ur_mem_handle_t hBuffer, bool blockingRead, size_t offset, size_t size, + void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hBuffer; + std::ignore = blockingRead; + std::ignore = offset; + std::ignore = size; + std::ignore = pDst; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferWrite( + ur_mem_handle_t hBuffer, bool blockingWrite, size_t offset, size_t size, + const void *pSrc, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hBuffer; + std::ignore = blockingWrite; + std::ignore = offset; + std::ignore = size; + std::ignore = pSrc; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferReadRect( + ur_mem_handle_t hBuffer, bool blockingRead, ur_rect_offset_t bufferOrigin, + ur_rect_offset_t hostOrigin, ur_rect_region_t region, size_t bufferRowPitch, + size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, + void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hBuffer; + std::ignore = blockingRead; + std::ignore = bufferOrigin; + std::ignore = hostOrigin; + std::ignore = region; + std::ignore = bufferRowPitch; + std::ignore = bufferSlicePitch; + std::ignore = hostRowPitch; + std::ignore = hostSlicePitch; + std::ignore = pDst; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferWriteRect( + ur_mem_handle_t hBuffer, bool blockingWrite, ur_rect_offset_t bufferOrigin, + ur_rect_offset_t hostOrigin, ur_rect_region_t region, size_t bufferRowPitch, + size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, + void *pSrc, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hBuffer; + std::ignore = blockingWrite; + std::ignore = bufferOrigin; + std::ignore = hostOrigin; + std::ignore = region; + std::ignore = bufferRowPitch; + std::ignore = bufferSlicePitch; + std::ignore = hostRowPitch; + std::ignore = hostSlicePitch; + std::ignore = pSrc; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferCopy( + ur_mem_handle_t hBufferSrc, ur_mem_handle_t hBufferDst, size_t srcOffset, + size_t dstOffset, size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hBufferSrc; + std::ignore = hBufferDst; + std::ignore = srcOffset; + std::ignore = dstOffset; + std::ignore = size; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferCopyRect( + ur_mem_handle_t hBufferSrc, ur_mem_handle_t hBufferDst, + ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, + ur_rect_region_t region, size_t srcRowPitch, size_t srcSlicePitch, + size_t dstRowPitch, size_t dstSlicePitch, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hBufferSrc; + std::ignore = hBufferDst; + std::ignore = srcOrigin; + std::ignore = dstOrigin; + std::ignore = region; + std::ignore = srcRowPitch; + std::ignore = srcSlicePitch; + std::ignore = dstRowPitch; + std::ignore = dstSlicePitch; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferFill( + ur_mem_handle_t hBuffer, const void *pPattern, size_t patternSize, + size_t offset, size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hBuffer; + std::ignore = pPattern; + std::ignore = patternSize; + std::ignore = offset; + std::ignore = size; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueMemImageRead( + ur_mem_handle_t hImage, bool blockingRead, ur_rect_offset_t origin, + ur_rect_region_t region, size_t rowPitch, size_t slicePitch, void *pDst, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = hImage; + std::ignore = blockingRead; + std::ignore = origin; + std::ignore = region; + std::ignore = rowPitch; + std::ignore = slicePitch; + std::ignore = pDst; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueMemImageWrite( + ur_mem_handle_t hImage, bool blockingWrite, ur_rect_offset_t origin, + ur_rect_region_t region, size_t rowPitch, size_t slicePitch, void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = hImage; + std::ignore = blockingWrite; + std::ignore = origin; + std::ignore = region; + std::ignore = rowPitch; + std::ignore = slicePitch; + std::ignore = pSrc; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueMemImageCopy( + ur_mem_handle_t hImageSrc, ur_mem_handle_t hImageDst, + ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, + ur_rect_region_t region, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hImageSrc; + std::ignore = hImageDst; + std::ignore = srcOrigin; + std::ignore = dstOrigin; + std::ignore = region; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferMap( + ur_mem_handle_t hBuffer, bool blockingMap, ur_map_flags_t mapFlags, + size_t offset, size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, + void **ppRetMap) { + std::ignore = hBuffer; + std::ignore = blockingMap; + std::ignore = mapFlags; + std::ignore = offset; + std::ignore = size; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + std::ignore = ppRetMap; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueMemUnmap( + ur_mem_handle_t hMem, void *pMappedPtr, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hMem; + std::ignore = pMappedPtr; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueUSMFill( + void *pMem, size_t patternSize, const void *pPattern, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = pMem; + std::ignore = patternSize; + std::ignore = pPattern; + std::ignore = size; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueUSMMemcpy( + bool blocking, void *pDst, const void *pSrc, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = blocking; + std::ignore = pDst; + std::ignore = pSrc; + std::ignore = size; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueUSMPrefetch( + const void *pMem, size_t size, ur_usm_migration_flags_t flags, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = pMem; + std::ignore = size; + std::ignore = flags; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t +ur_queue_immediate_in_order_t::enqueueUSMAdvise(const void *pMem, size_t size, + ur_usm_advice_flags_t advice, + ur_event_handle_t *phEvent) { + std::ignore = pMem; + std::ignore = size; + std::ignore = advice; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueUSMFill2D( + void *pMem, size_t pitch, size_t patternSize, const void *pPattern, + size_t width, size_t height, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = pMem; + std::ignore = pitch; + std::ignore = patternSize; + std::ignore = pPattern; + std::ignore = width; + std::ignore = height; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueUSMMemcpy2D( + bool blocking, void *pDst, size_t dstPitch, const void *pSrc, + size_t srcPitch, size_t width, size_t height, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = blocking; + std::ignore = pDst; + std::ignore = dstPitch; + std::ignore = pSrc; + std::ignore = srcPitch; + std::ignore = width; + std::ignore = height; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueDeviceGlobalVariableWrite( + ur_program_handle_t hProgram, const char *name, bool blockingWrite, + size_t count, size_t offset, const void *pSrc, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hProgram; + std::ignore = name; + std::ignore = blockingWrite; + std::ignore = count; + std::ignore = offset; + std::ignore = pSrc; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueDeviceGlobalVariableRead( + ur_program_handle_t hProgram, const char *name, bool blockingRead, + size_t count, size_t offset, void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hProgram; + std::ignore = name; + std::ignore = blockingRead; + std::ignore = count; + std::ignore = offset; + std::ignore = pDst; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueReadHostPipe( + ur_program_handle_t hProgram, const char *pipe_symbol, bool blocking, + void *pDst, size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hProgram; + std::ignore = pipe_symbol; + std::ignore = blocking; + std::ignore = pDst; + std::ignore = size; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueWriteHostPipe( + ur_program_handle_t hProgram, const char *pipe_symbol, bool blocking, + void *pSrc, size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hProgram; + std::ignore = pipe_symbol; + std::ignore = blocking; + std::ignore = pSrc; + std::ignore = size; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::bindlessImagesImageCopyExp( + void *pDst, void *pSrc, const ur_image_format_t *pImageFormat, + const ur_image_desc_t *pImageDesc, ur_exp_image_copy_flags_t imageCopyFlags, + ur_rect_offset_t srcOffset, ur_rect_offset_t dstOffset, + ur_rect_region_t copyExtent, ur_rect_region_t hostExtent, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = pDst; + std::ignore = pSrc; + std::ignore = pImageFormat; + std::ignore = pImageDesc; + std::ignore = imageCopyFlags; + std::ignore = srcOffset; + std::ignore = dstOffset; + std::ignore = copyExtent; + std::ignore = hostExtent; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t +ur_queue_immediate_in_order_t::bindlessImagesWaitExternalSemaphoreExp( + ur_exp_interop_semaphore_handle_t hSemaphore, bool hasWaitValue, + uint64_t waitValue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hSemaphore; + std::ignore = hasWaitValue; + std::ignore = waitValue; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t +ur_queue_immediate_in_order_t::bindlessImagesSignalExternalSemaphoreExp( + ur_exp_interop_semaphore_handle_t hSemaphore, bool hasSignalValue, + uint64_t signalValue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hSemaphore; + std::ignore = hasSignalValue; + std::ignore = signalValue; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueCooperativeKernelLaunchExp( + ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hKernel; + std::ignore = workDim; + std::ignore = pGlobalWorkOffset; + std::ignore = pGlobalWorkSize; + std::ignore = pLocalWorkSize; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueTimestampRecordingExp( + bool blocking, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = blocking; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueKernelLaunchCustomExp( + ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numPropsInLaunchPropList, + const ur_exp_launch_property_t *launchPropList, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = hKernel; + std::ignore = workDim; + std::ignore = pGlobalWorkSize; + std::ignore = pLocalWorkSize; + std::ignore = numPropsInLaunchPropList; + std::ignore = launchPropList; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueNativeCommandExp( + ur_exp_enqueue_native_command_function_t, void *, uint32_t, + const ur_mem_handle_t *, const ur_exp_enqueue_native_command_properties_t *, + uint32_t, const ur_event_handle_t *, ur_event_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} +} // namespace v2 diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.hpp b/source/adapters/level_zero/v2/queue_immediate_in_order.hpp new file mode 100644 index 0000000000..2e3553028f --- /dev/null +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.hpp @@ -0,0 +1,207 @@ +//===--------- queue_immediate_in_order.hpp - Level Zero Adapter ---------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "../common.hpp" +#include "../queue.hpp" + +#include "ur/ur.hpp" + +namespace v2 { +struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_handle_t_ { + ur_queue_immediate_in_order_t(ur_context_handle_t, ur_device_handle_t, + ur_queue_flags_t); + + ur_result_t queueGetInfo(ur_queue_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) override; + ur_result_t queueRetain() override; + ur_result_t queueRelease() override; + ur_result_t queueGetNativeHandle(ur_queue_native_desc_t *pDesc, + ur_native_handle_t *phNativeQueue) override; + ur_result_t queueFinish() override; + ur_result_t queueFlush() override; + ur_result_t enqueueKernelLaunch(ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, + const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueEventsWait(uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t + enqueueEventsWaitWithBarrier(uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferRead(ur_mem_handle_t hBuffer, bool blockingRead, + size_t offset, size_t size, void *pDst, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferWrite(ur_mem_handle_t hBuffer, bool blockingWrite, + size_t offset, size_t size, + const void *pSrc, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferReadRect( + ur_mem_handle_t hBuffer, bool blockingRead, ur_rect_offset_t bufferOrigin, + ur_rect_offset_t hostOrigin, ur_rect_region_t region, + size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, + size_t hostSlicePitch, void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferWriteRect( + ur_mem_handle_t hBuffer, bool blockingWrite, + ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferCopy(ur_mem_handle_t hBufferSrc, + ur_mem_handle_t hBufferDst, size_t srcOffset, + size_t dstOffset, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferCopyRect( + ur_mem_handle_t hBufferSrc, ur_mem_handle_t hBufferDst, + ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, + ur_rect_region_t region, size_t srcRowPitch, size_t srcSlicePitch, + size_t dstRowPitch, size_t dstSlicePitch, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferFill(ur_mem_handle_t hBuffer, + const void *pPattern, size_t patternSize, + size_t offset, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemImageRead(ur_mem_handle_t hImage, bool blockingRead, + ur_rect_offset_t origin, + ur_rect_region_t region, size_t rowPitch, + size_t slicePitch, void *pDst, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemImageWrite(ur_mem_handle_t hImage, bool blockingWrite, + ur_rect_offset_t origin, + ur_rect_region_t region, size_t rowPitch, + size_t slicePitch, void *pSrc, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t + enqueueMemImageCopy(ur_mem_handle_t hImageSrc, ur_mem_handle_t hImageDst, + ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, + ur_rect_region_t region, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferMap(ur_mem_handle_t hBuffer, bool blockingMap, + ur_map_flags_t mapFlags, size_t offset, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent, + void **ppRetMap) override; + ur_result_t enqueueMemUnmap(ur_mem_handle_t hMem, void *pMappedPtr, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueUSMFill(void *pMem, size_t patternSize, + const void *pPattern, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueUSMMemcpy(bool blocking, void *pDst, const void *pSrc, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueUSMFill2D(void *, size_t, size_t, const void *, size_t, + size_t, uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) override; + ur_result_t enqueueUSMMemcpy2D(bool, void *, size_t, const void *, size_t, + size_t, size_t, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) override; + ur_result_t enqueueUSMPrefetch(const void *pMem, size_t size, + ur_usm_migration_flags_t flags, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueUSMAdvise(const void *pMem, size_t size, + ur_usm_advice_flags_t advice, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueDeviceGlobalVariableWrite( + ur_program_handle_t hProgram, const char *name, bool blockingWrite, + size_t count, size_t offset, const void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueDeviceGlobalVariableRead( + ur_program_handle_t hProgram, const char *name, bool blockingRead, + size_t count, size_t offset, void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueReadHostPipe(ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pDst, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueWriteHostPipe(ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pSrc, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t bindlessImagesImageCopyExp( + void *pDst, void *pSrc, const ur_image_format_t *pImageFormat, + const ur_image_desc_t *pImageDesc, + ur_exp_image_copy_flags_t imageCopyFlags, ur_rect_offset_t srcOffset, + ur_rect_offset_t dstOffset, ur_rect_region_t copyExtent, + ur_rect_region_t hostExtent, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t bindlessImagesWaitExternalSemaphoreExp( + ur_exp_interop_semaphore_handle_t hSemaphore, bool hasWaitValue, + uint64_t waitValue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t bindlessImagesSignalExternalSemaphoreExp( + ur_exp_interop_semaphore_handle_t hSemaphore, bool hasSignalValue, + uint64_t signalValue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueCooperativeKernelLaunchExp( + ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t + enqueueTimestampRecordingExp(bool blocking, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueKernelLaunchCustomExp( + ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t numPropsInLaunchPropList, + const ur_exp_launch_property_t *launchPropList, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t + enqueueNativeCommandExp(ur_exp_enqueue_native_command_function_t, void *, + uint32_t, const ur_mem_handle_t *, + const ur_exp_enqueue_native_command_properties_t *, + uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) override; +}; + +} // namespace v2