diff --git a/scripts/generate_code.py b/scripts/generate_code.py index bdaa475a3e..5ff832945b 100644 --- a/scripts/generate_code.py +++ b/scripts/generate_code.py @@ -454,3 +454,46 @@ def generate_tools(path, section, namespace, tags, version, specs, meta): loc += _mako_info_hpp(infodir, namespace, tags, version, specs, meta) print("TOOLS Generated %s lines of code.\n" % loc) + +""" +Entry-point: + generates API functions that accept queue for level_zero +""" +def generate_level_zero_queue_api(path, section, namespace, tags, version, specs, meta): + template = "queue_api.cpp.mako" + fin = os.path.join("templates", template) + + name = "queue_api" + filename = "queue_api.cpp" + layer_dstpath = os.path.join(path, "adapters/level_zero") + os.makedirs(layer_dstpath, exist_ok=True) + fout = os.path.join(layer_dstpath, filename) + + print("Generating %s..." % fout) + + loc = util.makoWrite( + fin, fout, + ver=version, + name = name, + namespace=namespace, + tags=tags, + specs=specs, + meta=meta) + + template = "queue_api.hpp.mako" + fin = os.path.join("templates", template) + + filename = "queue_api.hpp" + fout = os.path.join(layer_dstpath, filename) + + print("Generating %s..." % fout) + + loc += util.makoWrite( + fin, fout, + ver=version, + name = name, + namespace=namespace, + tags=tags, + specs=specs, + meta=meta) + print("QUEUE Generated %s lines of code.\n" % loc) diff --git a/scripts/json2src.py b/scripts/json2src.py index d116e76426..df11f879ac 100755 --- a/scripts/json2src.py +++ b/scripts/json2src.py @@ -31,6 +31,7 @@ def add_argument(parser, name, help, default=False): add_argument(parser, "adapters", "generation of null adapter files.", True) add_argument(parser, "common", "generation of common files.", True) add_argument(parser, "tools", "generation of common files.", True) + add_argument(parser, "l0_queue", "generation of l0 queue abstractions.", True) parser.add_argument("--debug", action='store_true', help="dump intermediate data to disk.") parser.add_argument("--sections", type=list, default=None, help="Optional list of sections for which to generate source, default is all") parser.add_argument("--ver", type=str, default="1.0", help="specification version to generate.") @@ -60,6 +61,8 @@ def add_argument(parser, name, help, default=False): generate_code.generate_common(srcpath, config['name'], config['namespace'], config['tags'], args.ver, specs, input['meta']) if args.tools: generate_code.generate_tools(toolspath, config['name'], config['namespace'], config['tags'], args.ver, specs, input['meta']) + if args.l0_queue: + generate_code.generate_level_zero_queue_api(srcpath, config['name'], config['namespace'], config['tags'], args.ver, specs, input['meta']) if args.debug: util.makoFileListWrite("generated.json") diff --git a/scripts/templates/helper.py b/scripts/templates/helper.py index 0c90f4da8e..9b157c1259 100644 --- a/scripts/templates/helper.py +++ b/scripts/templates/helper.py @@ -1604,3 +1604,35 @@ def get_handle_create_get_retain_release_functions(specs, namespace, tags): records.append(record) return records + +""" +Public: + returns a list of objects representing functions that accept $x_queue_handle_t as a first param +""" +def get_queue_related_functions(specs, namespace, tags): + funcs = [] + for s in specs: + for obj in s['objects']: + if re.match(r"function", obj['type']): + if obj['params'] and obj['params'][0]['type'] == '$x_queue_handle_t': + funcs.append(obj) + return funcs + +""" +Public: + transform a queue related function using following rules: + - remove $x prefix + - make first letter lowercase + - remove first param (queue) +""" +def transform_queue_related_function_name(namespace, tags, obj, format = ["name", "type"]): + function_name = make_func_name(namespace, tags, obj).replace(namespace,'') + function_name=function_name[0].lower() + function_name[1:] + + if obj['params'][0]['type'] != '$x_queue_handle_t': + raise ValueError('First parameter is not a queue handle') + + params = make_param_lines(namespace, tags, obj, format=format) + params = params[1:] + + return "{}({})".format(function_name, ", ".join(params)) diff --git a/scripts/templates/queue_api.cpp.mako b/scripts/templates/queue_api.cpp.mako new file mode 100644 index 0000000000..f941c7ba03 --- /dev/null +++ b/scripts/templates/queue_api.cpp.mako @@ -0,0 +1,37 @@ +<%! +import re +from templates import helper as th +%><% + n=namespace + N=n.upper() + + x=tags['$x'] + X=x.upper() +%>/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM + * Exceptions. See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file ${name}.cpp + * + */ + +#include "queue_api.hpp" + +ur_queue_handle_t_::~ur_queue_handle_t_() {} + +## FUNCTION ################################################################### +%for obj in th.get_queue_related_functions(specs, n, tags): +${X}_APIEXPORT ${x}_result_t ${X}_APICALL +${th.make_func_name(n, tags, obj)}( + %for line in th.make_param_lines(n, tags, obj, format=["name", "type", "delim"]): + ${line} + %endfor + ) +{ + return ${obj['params'][0]['name']}->${th.transform_queue_related_function_name(n, tags, obj, format=["name"])}; +} +%endfor diff --git a/scripts/templates/queue_api.hpp.mako b/scripts/templates/queue_api.hpp.mako new file mode 100644 index 0000000000..dcc86265f7 --- /dev/null +++ b/scripts/templates/queue_api.hpp.mako @@ -0,0 +1,31 @@ +<%! +import re +from templates import helper as th +%><% + n=namespace + N=n.upper() + + x=tags['$x'] + X=x.upper() +%>/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM + * Exceptions. See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file ${name}.hpp + * + */ + +#pragma once + +#include + +struct ur_queue_handle_t_ { + virtual ~ur_queue_handle_t_(); + %for obj in th.get_queue_related_functions(specs, n, tags): + virtual ${x}_result_t ${th.transform_queue_related_function_name(n, tags, obj, format=["type"])} = 0; + %endfor +}; diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index c652b9ece4..171aaa3924 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -110,8 +110,11 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.hpp ${CMAKE_CURRENT_SOURCE_DIR}/platform.hpp ${CMAKE_CURRENT_SOURCE_DIR}/program.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/queue_api.hpp ${CMAKE_CURRENT_SOURCE_DIR}/queue.hpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_factory.hpp ${CMAKE_CURRENT_SOURCE_DIR}/ur_level_zero.cpp ${CMAKE_CURRENT_SOURCE_DIR}/common.cpp ${CMAKE_CURRENT_SOURCE_DIR}/context.cpp @@ -126,10 +129,12 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.cpp ${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/queue_api.cpp ${CMAKE_CURRENT_SOURCE_DIR}/queue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.cpp ) if(NOT WIN32) diff --git a/source/adapters/level_zero/enqueue_native.cpp b/source/adapters/level_zero/enqueue_native.cpp index b708333de7..b67cccc4f1 100644 --- a/source/adapters/level_zero/enqueue_native.cpp +++ b/source/adapters/level_zero/enqueue_native.cpp @@ -10,10 +10,11 @@ #include -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueNativeCommandExp( - ur_queue_handle_t, ur_exp_enqueue_native_command_function_t, void *, - uint32_t, const ur_mem_handle_t *, - const ur_exp_enqueue_native_command_properties_t *, uint32_t, - const ur_event_handle_t *, ur_event_handle_t *) { +#include "queue.hpp" + +ur_result_t ur_queue_handle_legacy_t_::enqueueNativeCommandExp( + ur_exp_enqueue_native_command_function_t, void *, uint32_t, + const ur_mem_handle_t *, const ur_exp_enqueue_native_command_properties_t *, + uint32_t, const ur_event_handle_t *, ur_event_handle_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 5881610f68..77c47d51aa 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -59,8 +59,8 @@ bool WaitListEmptyOrAllEventsFromSameQueue( return true; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object +ur_result_t ur_queue_handle_legacy_t_::enqueueEventsWait( ///< [in] handle of + ///< the queue object uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] @@ -72,7 +72,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; if (EventWaitList) { bool UseCopyEngine = false; @@ -152,8 +152,9 @@ static const bool InOrderBarrierBySignal = [] { return (UrRet ? std::atoi(UrRet) : true); }(); -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object +ur_result_t +ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the + ///< queue object uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] @@ -165,7 +166,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; // Lock automatically releases when this goes out of scope. std::scoped_lock lock(Queue->Mutex); @@ -661,8 +662,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object +ur_result_t ur_queue_handle_legacy_t_::enqueueTimestampRecordingExp( bool Blocking, ///< [in] blocking or non-blocking enqueue uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t @@ -676,7 +676,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( *OutEvent ///< [in,out] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; // Lock automatically releases when this goes out of scope. std::scoped_lock lock(Queue->Mutex); @@ -1022,7 +1022,6 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) { // Save pointer to the queue before deleting/resetting event. auto Queue = Legacy(Event->UrQueue); - auto URQueue = Event->UrQueue; // If the event was a timestamp recording, we try to evict its entry in the // queue. @@ -1054,8 +1053,8 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) { // created so that we can avoid ur_queue_handle_t is released before the // associated ur_event_handle_t is released. Here we have to decrement it so // ur_queue_handle_t can be released successfully. - if (URQueue) { - UR_CALL(urQueueReleaseInternal(URQueue)); + if (Queue) { + UR_CALL(urQueueReleaseInternal(Queue)); } return UR_RESULT_SUCCESS; diff --git a/source/adapters/level_zero/image.cpp b/source/adapters/level_zero/image.cpp index 4199cb1106..2b8292538e 100644 --- a/source/adapters/level_zero/image.cpp +++ b/source/adapters/level_zero/image.cpp @@ -748,14 +748,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( - ur_queue_handle_t hUrQueue, void *pDst, void *pSrc, - const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, - ur_exp_image_copy_flags_t imageCopyFlags, ur_rect_offset_t srcOffset, - ur_rect_offset_t dstOffset, ur_rect_region_t copyExtent, - ur_rect_region_t hostExtent, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - auto hQueue = Legacy(hUrQueue); +ur_result_t ur_queue_handle_legacy_t_::bindlessImagesImageCopyExp( + void *pDst, void *pSrc, const ur_image_format_t *pImageFormat, + const ur_image_desc_t *pImageDesc, ur_exp_image_copy_flags_t imageCopyFlags, + ur_rect_offset_t srcOffset, ur_rect_offset_t dstOffset, + ur_rect_region_t copyExtent, ur_rect_region_t hostExtent, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + auto hQueue = this; std::scoped_lock Lock(hQueue->Mutex); UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); @@ -1108,11 +1108,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesDestroyExternalSemaphoreExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( - ur_queue_handle_t hQueue, ur_exp_interop_semaphore_handle_t hSemaphore, - bool hasValue, uint64_t waitValue, uint32_t numEventsInWaitList, +ur_result_t ur_queue_handle_legacy_t_::bindlessImagesWaitExternalSemaphoreExp( + ur_exp_interop_semaphore_handle_t hSemaphore, bool hasValue, + uint64_t waitValue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = hQueue; std::ignore = hSemaphore; std::ignore = hasValue; std::ignore = waitValue; @@ -1124,11 +1123,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( - ur_queue_handle_t hQueue, ur_exp_interop_semaphore_handle_t hSemaphore, - bool hasValue, uint64_t signalValue, uint32_t numEventsInWaitList, +ur_result_t ur_queue_handle_legacy_t_::bindlessImagesSignalExternalSemaphoreExp( + ur_exp_interop_semaphore_handle_t hSemaphore, bool hasValue, + uint64_t signalValue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = hQueue; std::ignore = hSemaphore; std::ignore = hasValue; std::ignore = signalValue; diff --git a/source/adapters/level_zero/kernel.cpp b/source/adapters/level_zero/kernel.cpp index fa1b2f9192..a33e320cc4 100644 --- a/source/adapters/level_zero/kernel.cpp +++ b/source/adapters/level_zero/kernel.cpp @@ -101,8 +101,7 @@ ur_result_t getSuggestedLocalWorkSize(ur_queue_handle_legacy_t hQueue, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object +ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunch( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t WorkDim, ///< [in] number of dimensions, from 1 to 3, to specify ///< the global and work-group work-items @@ -131,7 +130,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular kernel execution instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; ze_kernel_handle_t ZeKernel{}; UR_CALL(getZeKernel(Queue, Kernel, &ZeKernel)); @@ -309,8 +308,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object +ur_result_t ur_queue_handle_legacy_t_::enqueueCooperativeKernelLaunchExp( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t WorkDim, ///< [in] number of dimensions, from 1 to 3, to specify ///< the global and work-group work-items @@ -339,7 +337,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular kernel execution instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; auto ZeDevice = Queue->Device->ZeDevice; ze_kernel_handle_t ZeKernel{}; @@ -571,8 +569,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue to submit to. +ur_result_t ur_queue_handle_legacy_t_::enqueueDeviceGlobalVariableWrite( ur_program_handle_t Program, ///< [in] handle of the program containing the ///< device global variable. const char @@ -593,7 +590,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( *Event ///< [in,out][optional] return an event object that identifies ///< this particular kernel execution instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; std::scoped_lock lock(Queue->Mutex); // Find global variable pointer @@ -621,29 +618,28 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( EventWaitList, Event, PreferCopyEngine); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue to submit to. - ur_program_handle_t Program, ///< [in] handle of the program containing the - ///< device global variable. - const char - *Name, ///< [in] the unique identifier for the device global variable. +ur_result_t ur_queue_handle_legacy_t_::enqueueDeviceGlobalVariableRead( + ur_program_handle_t Program, ///< [in] handle of the program containing + ///< the device global variable. + const char *Name, ///< [in] the unique identifier for the device global + ///< variable. bool BlockingRead, ///< [in] indicates if this operation should block. size_t Count, ///< [in] the number of bytes to copy. - size_t Offset, ///< [in] the byte offset into the device global variable to - ///< start copying. - void *Dst, ///< [in] pointer to where the data must be copied to. + size_t Offset, ///< [in] the byte offset into the device global variable + ///< to start copying. + void *Dst, ///< [in] pointer to where the data must be copied to. uint32_t NumEventsInWaitList, ///< [in] size of the event wait list. const ur_event_handle_t *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before the kernel execution. If nullptr, the - ///< numEventsInWaitList must be 0, indicating that no - ///< wait event. + ///< pointer to a list of events that must be + ///< complete before the kernel execution. If + ///< nullptr, the numEventsInWaitList must be 0, + ///< indicating that no wait event. ur_event_handle_t - *Event ///< [in,out][optional] return an event object that identifies - ///< this particular kernel execution instance. + *Event ///< [in,out][optional] return an event object that + ///< identifies this particular kernel execution instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; std::scoped_lock lock(Queue->Mutex); @@ -1206,3 +1202,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetSpecializationConstants( "{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + +ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunchCustomExp( + ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numPropsInLaunchPropList, + const ur_exp_launch_property_t *launchPropList, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = hKernel; + std::ignore = workDim; + std::ignore = pGlobalWorkSize; + std::ignore = pLocalWorkSize; + std::ignore = numPropsInLaunchPropList; + std::ignore = launchPropList; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/level_zero/memory.cpp b/source/adapters/level_zero/memory.cpp index 24b9c53afb..95650a7b94 100644 --- a/source/adapters/level_zero/memory.cpp +++ b/source/adapters/level_zero/memory.cpp @@ -461,9 +461,8 @@ static ur_result_t enqueueMemImageCommandHelper( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object +ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferRead( + ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object size_t size, ///< [in] size in bytes of data being read @@ -480,7 +479,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( *phEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; ur_mem_handle_t_ *Src = ur_cast(hBuffer); std::shared_lock SrcLock(Src->Mutex, std::defer_lock); @@ -496,9 +495,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( true /* PreferCopyEngine */); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object +ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferWrite( + ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object @@ -517,7 +515,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( *phEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; ur_mem_handle_t_ *Buffer = ur_cast(hBuffer); std::scoped_lock Lock(Queue->Mutex, @@ -534,9 +532,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( true /* PreferCopyEngine */); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object +ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferReadRect( + ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region @@ -563,7 +560,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( *phEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; ur_mem_handle_t_ *Buffer = ur_cast(hBuffer); std::shared_lock SrcLock(Buffer->Mutex, std::defer_lock); @@ -580,9 +577,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object +ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferWriteRect( + ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer @@ -611,7 +607,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( *phEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; ur_mem_handle_t_ *Buffer = ur_cast(hBuffer); std::scoped_lock Lock(Queue->Mutex, @@ -628,8 +624,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object +ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferCopy( ur_mem_handle_t BufferSrc, ///< [in] handle of the src buffer object ur_mem_handle_t BufferDst, ///< [in] handle of the dest buffer object size_t SrcOffset, ///< [in] offset into hBufferSrc to begin copying from @@ -647,7 +642,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; _ur_buffer *SrcBuffer = ur_cast<_ur_buffer *>(BufferSrc); _ur_buffer *DstBuffer = ur_cast<_ur_buffer *>(BufferDst); @@ -680,8 +675,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( OutEvent, PreferCopyEngine); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object +ur_result_t +ur_queue_handle_legacy_t_::enqueueMemBufferCopyRect( ///< [in] handle of the + ///< queue object ur_mem_handle_t BufferSrc, ///< [in] handle of the source buffer object ur_mem_handle_t BufferDst, ///< [in] handle of the dest buffer object ur_rect_offset_t SrcOrigin, ///< [in] 3D offset in the source buffer @@ -708,7 +704,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; _ur_buffer *SrcBuffer = ur_cast<_ur_buffer *>(BufferSrc); _ur_buffer *DstBuffer = ur_cast<_ur_buffer *>(BufferDst); @@ -739,12 +735,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( NumEventsInWaitList, EventWaitList, OutEvent, PreferCopyEngine); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object - ur_mem_handle_t Buffer, ///< [in] handle of the buffer object - const void *Pattern, ///< [in] pointer to the fill pattern - size_t PatternSize, ///< [in] size in bytes of the pattern - size_t Offset, ///< [in] offset into the buffer +ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferFill( + ur_mem_handle_t Buffer, ///< [in] handle of the buffer object + const void *Pattern, ///< [in] pointer to the fill pattern + size_t PatternSize, ///< [in] size in bytes of the pattern + size_t Offset, ///< [in] offset into the buffer size_t Size, ///< [in] fill size in bytes, must be a multiple of patternSize uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t @@ -758,7 +753,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; std::scoped_lock Lock(Queue->Mutex, Buffer->Mutex); @@ -773,9 +768,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( Size, NumEventsInWaitList, EventWaitList, OutEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object - ur_mem_handle_t Image, ///< [in] handle of the image object +ur_result_t ur_queue_handle_legacy_t_::enqueueMemImageRead( + ur_mem_handle_t Image, ///< [in] handle of the image object bool BlockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t Origin, ///< [in] defines the (x,y,z) offset in pixels in ///< the 1D, 2D, or 3D image @@ -796,7 +790,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; std::scoped_lock Lock(Queue->Mutex, Image->Mutex); return enqueueMemImageCommandHelper( @@ -805,9 +799,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( EventWaitList, OutEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object - ur_mem_handle_t Image, ///< [in] handle of the image object +ur_result_t ur_queue_handle_legacy_t_::enqueueMemImageWrite( + ur_mem_handle_t Image, ///< [in] handle of the image object bool BlockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t Origin, ///< [in] defines the (x,y,z) offset in pixels in @@ -829,7 +822,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; std::scoped_lock Lock(Queue->Mutex, Image->Mutex); return enqueueMemImageCommandHelper( @@ -838,8 +831,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( EventWaitList, OutEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object +ur_result_t +ur_queue_handle_legacy_t_::enqueueMemImageCopy( ///< [in] handle of + ///< the queue object ur_mem_handle_t ImageSrc, ///< [in] handle of the src image object ur_mem_handle_t ImageDst, ///< [in] handle of the dest image object ur_rect_offset_t SrcOrigin, ///< [in] defines the (x,y,z) offset in pixels @@ -860,7 +854,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; std::shared_lock SrcLock(ImageSrc->Mutex, std::defer_lock); std::scoped_lock, ur_shared_mutex, ur_shared_mutex> @@ -878,9 +872,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( NumEventsInWaitList, EventWaitList, OutEvent, PreferCopyEngine); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object - ur_mem_handle_t Buf, ///< [in] handle of the buffer object +ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferMap( + ur_mem_handle_t Buf, ///< [in] handle of the buffer object bool BlockingMap, ///< [in] indicates blocking (true), non-blocking (false) ur_map_flags_t MapFlags, ///< [in] flags for read, write, readwrite mapping size_t Offset, ///< [in] offset in bytes of the buffer region being mapped @@ -899,7 +892,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( void **RetMap ///< [in,out] return mapped pointer. TODO: move it before ///< numEventsInWaitList? ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; auto Buffer = ur_cast<_ur_buffer *>(Buf); UR_ASSERT(!Buffer->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); @@ -961,7 +954,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( UR_CALL(urEventWait(NumEventsInWaitList, EventWaitList)); if (Queue->isInOrderQueue()) - UR_CALL(urQueueFinish(UrQueue)); + UR_CALL(urQueueFinish(Queue)); // Lock automatically releases when this goes out of scope. std::scoped_lock Guard(Buffer->Mutex); @@ -1047,8 +1040,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object +ur_result_t ur_queue_handle_legacy_t_::enqueueMemUnmap( ur_mem_handle_t Mem, ///< [in] handle of the memory (buffer or image) object void *MappedPtr, ///< [in] mapped host address uint32_t NumEventsInWaitList, ///< [in] size of the event wait list @@ -1063,7 +1055,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; UR_ASSERT(!Mem->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); auto Buffer = ur_cast<_ur_buffer *>(Mem); @@ -1118,7 +1110,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( UR_CALL(urEventWait(NumEventsInWaitList, EventWaitList)); if (Queue->isInOrderQueue()) - UR_CALL(urQueueFinish(UrQueue)); + UR_CALL(urQueueFinish(Queue)); char *ZeHandleDst; UR_CALL(Buffer->getZeHandle(ZeHandleDst, ur_mem_handle_t_::write_only, @@ -1175,38 +1167,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemset( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object - void *Ptr, ///< [in] pointer to USM memory object - int8_t ByteValue, ///< [in] byte value to fill - size_t Count, ///< [in] size in bytes to be set - uint32_t NumEventsInWaitList, ///< [in] size of the event wait list - const ur_event_handle_t - *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before this command can be executed. If nullptr, - ///< the numEventsInWaitList must be 0, indicating - ///< that this command does not wait on any event to - ///< complete. - ur_event_handle_t *Event ///< [in,out][optional] return an event object that - ///< identifies this particular command instance. -) { - auto Queue = Legacy(UrQueue); - std::ignore = Queue; - std::ignore = Ptr; - std::ignore = ByteValue; - std::ignore = Count; - std::ignore = NumEventsInWaitList; - std::ignore = EventWaitList; - std::ignore = Event; - logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"), - "{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object - bool Blocking, ///< [in] blocking or non-blocking copy +ur_result_t ur_queue_handle_legacy_t_::enqueueUSMMemcpy( + bool Blocking, ///< [in] blocking or non-blocking copy void *Dst, ///< [in] pointer to the destination USM memory object const void *Src, ///< [in] pointer to the source USM memory object size_t Size, ///< [in] size in bytes to be copied @@ -1222,7 +1184,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; std::scoped_lock lock(Queue->Mutex); // Device to Device copies are found to execute slower on copy engine @@ -1238,8 +1200,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( NumEventsInWaitList, EventWaitList, OutEvent, PreferCopyEngine); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object +ur_result_t ur_queue_handle_legacy_t_::enqueueUSMPrefetch( const void *Mem, ///< [in] pointer to the USM memory object size_t Size, ///< [in] size in bytes to be fetched ur_usm_migration_flags_t Flags, ///< [in] USM prefetch flags @@ -1255,7 +1216,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; std::ignore = Flags; // Lock automatically releases when this goes out of scope. std::scoped_lock lock(Queue->Mutex); @@ -1307,8 +1268,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object +ur_result_t ur_queue_handle_legacy_t_::enqueueUSMAdvise( const void *Mem, ///< [in] pointer to the USM memory object size_t Size, ///< [in] size in bytes to be advised ur_usm_advice_flags_t Advice, ///< [in] USM memory advice @@ -1316,7 +1276,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; // Lock automatically releases when this goes out of scope. std::scoped_lock lock(Queue->Mutex); @@ -1366,9 +1326,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( - ur_queue_handle_t Queue, ///< [in] handle of the queue to submit to. - void *Mem, ///< [in] pointer to memory to be filled. +ur_result_t ur_queue_handle_legacy_t_::enqueueUSMFill2D( + void *Mem, ///< [in] pointer to memory to be filled. size_t Pitch, ///< [in] the total width of the destination memory including ///< padding. size_t PatternSize, ///< [in] the size in bytes of the pattern. @@ -1386,7 +1345,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular kernel execution instance. ) { - std::ignore = Queue; std::ignore = Mem; std::ignore = Pitch; std::ignore = PatternSize; @@ -1401,41 +1359,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemset2D( - ur_queue_handle_t Queue, ///< [in] handle of the queue to submit to. - void *Mem, ///< [in] pointer to memory to be filled. - size_t Pitch, ///< [in] the total width of the destination memory including - ///< padding. - int Value, ///< [in] the value to fill into the region in pMem. - size_t Width, ///< [in] the width in bytes of each row to set. - size_t Height, ///< [in] the height of the columns to set. - uint32_t NumEventsInWaitList, ///< [in] size of the event wait list - const ur_event_handle_t - *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before the kernel execution. If nullptr, the - ///< numEventsInWaitList must be 0, indicating that no - ///< wait event. - ur_event_handle_t - *OutEvent ///< [in,out][optional] return an event object that identifies - ///< this particular kernel execution instance. -) { - std::ignore = Queue; - std::ignore = Mem; - std::ignore = Pitch; - std::ignore = Value; - std::ignore = Width; - std::ignore = Height; - std::ignore = NumEventsInWaitList; - std::ignore = EventWaitList; - std::ignore = OutEvent; - logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"), - "{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue to submit to. +ur_result_t ur_queue_handle_legacy_t_::enqueueUSMMemcpy2D( bool Blocking, ///< [in] indicates if this operation should block the host. void *Dst, ///< [in] pointer to memory where data will be copied. size_t DstPitch, ///< [in] the total width of the source memory including @@ -1456,7 +1380,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( *Event ///< [in,out][optional] return an event object that identifies ///< this particular kernel execution instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; ur_rect_offset_t ZeroOffset{0, 0, 0}; ur_rect_region_t Region{Width, Height, 0}; @@ -2353,9 +2277,8 @@ size_t _ur_buffer::getAlignment() const { return Alignment; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object - void *Ptr, ///< [in] pointer to USM memory object +ur_result_t ur_queue_handle_legacy_t_::enqueueUSMFill( + void *Ptr, ///< [in] pointer to USM memory object size_t PatternSize, ///< [in] the size in bytes of the pattern. Must be a ///< power of 2 and less than or equal to width. const void *Pattern, ///< [in] pointer with the bytes of the pattern to set. @@ -2371,7 +2294,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( ur_event_handle_t *Event ///< [out][optional] return an event object that ///< identifies this particular command instance. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; std::scoped_lock Lock(Queue->Mutex); return enqueueMemFillHelper( @@ -2383,13 +2306,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( } /// Host Pipes -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( - ur_queue_handle_t UrQueue, ur_program_handle_t hProgram, - const char *pipe_symbol, bool blocking, void *pDst, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - auto hQueue = Legacy(UrQueue); - std::ignore = hQueue; +ur_result_t ur_queue_handle_legacy_t_::enqueueReadHostPipe( + ur_program_handle_t hProgram, const char *pipe_symbol, bool blocking, + void *pDst, size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { std::ignore = hProgram; std::ignore = pipe_symbol; std::ignore = blocking; @@ -2403,13 +2323,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t urEnqueueWriteHostPipe( - ur_queue_handle_t UrQueue, ur_program_handle_t hProgram, - const char *pipe_symbol, bool blocking, void *pSrc, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - auto hQueue = Legacy(UrQueue); - std::ignore = hQueue; +ur_result_t ur_queue_handle_legacy_t_::enqueueWriteHostPipe( + ur_program_handle_t hProgram, const char *pipe_symbol, bool blocking, + void *pSrc, size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { std::ignore = hProgram; std::ignore = pipe_symbol; std::ignore = blocking; diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index 769a321766..00e8c0f718 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -24,6 +24,8 @@ #include "ur_util.hpp" #include "ze_api.h" +#include "v2/queue_factory.hpp" + // Hard limit for the event completion batches. static const uint64_t CompletionBatchesMax = [] { // Default value chosen empirically to maximize the number of asynchronous @@ -342,8 +344,7 @@ ur_result_t resetCommandLists(ur_queue_handle_legacy_t Queue) { return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue object +ur_result_t ur_queue_handle_legacy_t_::queueGetInfo( ur_queue_info_t ParamName, ///< [in] name of the queue property to query size_t ParamValueSize, ///< [in] size in bytes of the queue property value ///< provided @@ -351,7 +352,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo( size_t *ParamValueSizeRet ///< [out] size in bytes returned in queue ///< property value ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; std::shared_lock Lock(Queue->Mutex); UrReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet); @@ -504,6 +505,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( UR_ASSERT(Context->isValidDevice(Device), UR_RESULT_ERROR_INVALID_DEVICE); + // optimized path for immediate, in-order command lists + if (v2::shouldUseQueueV2(Device, Flags)) { + *Queue = v2::createQueue(Context, Device, Flags); + return UR_RESULT_SUCCESS; + } + // Create placeholder queues in the compute queue group. // Actual L0 queues will be created at first use. std::vector ZeComputeCommandQueues( @@ -529,9 +536,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( nullptr); try { - *Queue = new ur_queue_handle_t_( - std::in_place_type, ZeComputeCommandQueues, - ZeCopyCommandQueues, Context, Device, true, Flags, ForceComputeIndex); + *Queue = new ur_queue_handle_legacy_t_(ZeComputeCommandQueues, + ZeCopyCommandQueues, Context, Device, + true, Flags, ForceComputeIndex); } catch (const std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } catch (...) { @@ -581,10 +588,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain( - ur_queue_handle_t UrQueue ///< [in] handle of the queue object to get access -) { - auto Queue = Legacy(UrQueue); +ur_result_t ur_queue_handle_legacy_t_::queueRetain() { + auto Queue = this; + { std::scoped_lock Lock(Queue->Mutex); Queue->RefCountExternal++; @@ -593,10 +599,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease( - ur_queue_handle_t UrQueue ///< [in] handle of the queue object to release -) { - auto Queue = Legacy(UrQueue); +ur_result_t ur_queue_handle_legacy_t_::queueRelease() { + auto Queue = this; std::vector EventListToCleanup; { @@ -687,17 +691,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease( // (it was incremented when they were added to the command list). UR_CALL(urEventReleaseInternal(reinterpret_cast(Event))); } - UR_CALL(urQueueReleaseInternal(UrQueue)); + UR_CALL(urQueueReleaseInternal(Queue)); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urQueueGetNativeHandle( - ur_queue_handle_t UrQueue, ///< [in] handle of the queue. +ur_result_t ur_queue_handle_legacy_t_::queueGetNativeHandle( ur_queue_native_desc_t *Desc, ur_native_handle_t *NativeQueue ///< [out] a pointer to the native handle of the queue. ) { - auto Queue = Legacy(UrQueue); + auto Queue = this; + // Lock automatically releases when this goes out of scope. std::shared_lock lock(Queue->Mutex); @@ -800,9 +804,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( std::vector CopyQueues; try { - ur_queue_handle_t_ *Queue = new ur_queue_handle_t_( - std::in_place_type, ComputeQueues, - CopyQueues, Context, UrDevice, OwnNativeHandle, Flags); + ur_queue_handle_t_ *Queue = new ur_queue_handle_legacy_t_( + ComputeQueues, CopyQueues, Context, UrDevice, OwnNativeHandle, Flags); *RetQueue = reinterpret_cast(Queue); } catch (const std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_RESOURCES; @@ -824,9 +827,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( std::vector ZeroCopyQueues; try { - ur_queue_handle_t_ *Queue = new ur_queue_handle_t_( - std::in_place_type, ZeQueues, - ZeroCopyQueues, Context, UrDevice, OwnNativeHandle, Flags); + ur_queue_handle_t_ *Queue = new ur_queue_handle_legacy_t_( + ZeQueues, ZeroCopyQueues, Context, UrDevice, OwnNativeHandle, Flags); *RetQueue = reinterpret_cast(Queue); } catch (const std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_RESOURCES; @@ -839,10 +841,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish( - ur_queue_handle_t UrQueue ///< [in] handle of the queue to be finished. -) { - auto Queue = Legacy(UrQueue); +ur_result_t ur_queue_handle_legacy_t_::queueFinish() { + auto Queue = this; if (Queue->UsingImmCmdLists) { // Lock automatically releases when this goes out of scope. std::scoped_lock Lock(Queue->Mutex); @@ -907,10 +907,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush( - ur_queue_handle_t UrQueue ///< [in] handle of the queue to be flushed. -) { - auto Queue = Legacy(UrQueue); +ur_result_t ur_queue_handle_legacy_t_::queueFlush() { + auto Queue = this; std::scoped_lock Lock(Queue->Mutex); return Queue->executeAllOpenCommandLists(); } @@ -1576,9 +1574,7 @@ void ur_queue_handle_legacy_t_::clearEndTimeRecordings() { EndTimeRecordings.clear(); } -ur_result_t urQueueReleaseInternal(ur_queue_handle_t UrQueue) { - ur_queue_handle_legacy_t Queue = Legacy(UrQueue); - +ur_result_t urQueueReleaseInternal(ur_queue_handle_legacy_t Queue) { if (!Queue->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; @@ -1612,7 +1608,7 @@ ur_result_t urQueueReleaseInternal(ur_queue_handle_t UrQueue) { Queue->CopyCommandBatch.NumTimesClosedFull, Queue->CopyCommandBatch.NumTimesClosedEarly); - delete UrQueue; + delete Queue; return UR_RESULT_SUCCESS; } @@ -1885,7 +1881,7 @@ ur_result_t createEventAndAssociateQueue(ur_queue_handle_legacy_t Queue, HostVisible.value(), Event, Queue->CounterBasedEventsEnabled)); - (*Event)->UrQueue = Queue->UnifiedHandle; + (*Event)->UrQueue = Queue; (*Event)->CommandType = CommandType; (*Event)->IsDiscarded = IsInternal; (*Event)->IsMultiDevice = IsMultiDevice; diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp index afd0d8975e..3759353783 100644 --- a/source/adapters/level_zero/queue.hpp +++ b/source/adapters/level_zero/queue.hpp @@ -26,20 +26,15 @@ #include "common.hpp" #include "device.hpp" +#include "queue_api.hpp" struct ur_queue_handle_legacy_t_; using ur_queue_handle_legacy_t = ur_queue_handle_legacy_t_ *; extern "C" { -ur_result_t urQueueReleaseInternal(ur_queue_handle_t Queue); +ur_result_t urQueueReleaseInternal(ur_queue_handle_legacy_t Queue); } // extern "C" -namespace v2 { -struct ur_queue_dispatcher_t { - // TODO -}; -} // namespace v2 - struct ur_completion_batch; using ur_completion_batch_list = std::list; using ur_completion_batch_it = ur_completion_batch_list::iterator; @@ -233,7 +228,7 @@ using ur_command_list_map_t = // The iterator pointing to a specific command-list in use. using ur_command_list_ptr_t = ur_command_list_map_t::iterator; -struct ur_queue_handle_legacy_t_ : _ur_object { +struct ur_queue_handle_legacy_t_ : _ur_object, public ur_queue_handle_t_ { ur_queue_handle_legacy_t_( std::vector &ComputeQueues, std::vector &CopyQueues, @@ -241,6 +236,190 @@ struct ur_queue_handle_legacy_t_ : _ur_object { bool OwnZeCommandQueue, ur_queue_flags_t Properties = 0, int ForceComputeIndex = -1); + ur_result_t queueGetInfo(ur_queue_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) override; + ur_result_t queueRetain() override; + ur_result_t queueRelease() override; + ur_result_t queueGetNativeHandle(ur_queue_native_desc_t *pDesc, + ur_native_handle_t *phNativeQueue) override; + ur_result_t queueFinish() override; + ur_result_t queueFlush() override; + ur_result_t enqueueKernelLaunch(ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, + const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueEventsWait(uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t + enqueueEventsWaitWithBarrier(uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferRead(ur_mem_handle_t hBuffer, bool blockingRead, + size_t offset, size_t size, void *pDst, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferWrite(ur_mem_handle_t hBuffer, bool blockingWrite, + size_t offset, size_t size, + const void *pSrc, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferReadRect( + ur_mem_handle_t hBuffer, bool blockingRead, ur_rect_offset_t bufferOrigin, + ur_rect_offset_t hostOrigin, ur_rect_region_t region, + size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, + size_t hostSlicePitch, void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferWriteRect( + ur_mem_handle_t hBuffer, bool blockingWrite, + ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferCopy(ur_mem_handle_t hBufferSrc, + ur_mem_handle_t hBufferDst, size_t srcOffset, + size_t dstOffset, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferCopyRect( + ur_mem_handle_t hBufferSrc, ur_mem_handle_t hBufferDst, + ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, + ur_rect_region_t region, size_t srcRowPitch, size_t srcSlicePitch, + size_t dstRowPitch, size_t dstSlicePitch, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferFill(ur_mem_handle_t hBuffer, + const void *pPattern, size_t patternSize, + size_t offset, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemImageRead(ur_mem_handle_t hImage, bool blockingRead, + ur_rect_offset_t origin, + ur_rect_region_t region, size_t rowPitch, + size_t slicePitch, void *pDst, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemImageWrite(ur_mem_handle_t hImage, bool blockingWrite, + ur_rect_offset_t origin, + ur_rect_region_t region, size_t rowPitch, + size_t slicePitch, void *pSrc, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t + enqueueMemImageCopy(ur_mem_handle_t hImageSrc, ur_mem_handle_t hImageDst, + ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, + ur_rect_region_t region, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferMap(ur_mem_handle_t hBuffer, bool blockingMap, + ur_map_flags_t mapFlags, size_t offset, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent, + void **ppRetMap) override; + ur_result_t enqueueMemUnmap(ur_mem_handle_t hMem, void *pMappedPtr, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueUSMFill(void *pMem, size_t patternSize, + const void *pPattern, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueUSMMemcpy(bool blocking, void *pDst, const void *pSrc, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueUSMFill2D(void *, size_t, size_t, const void *, size_t, + size_t, uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) override; + ur_result_t enqueueUSMMemcpy2D(bool, void *, size_t, const void *, size_t, + size_t, size_t, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) override; + ur_result_t enqueueUSMPrefetch(const void *pMem, size_t size, + ur_usm_migration_flags_t flags, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueUSMAdvise(const void *pMem, size_t size, + ur_usm_advice_flags_t advice, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueDeviceGlobalVariableWrite( + ur_program_handle_t hProgram, const char *name, bool blockingWrite, + size_t count, size_t offset, const void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueDeviceGlobalVariableRead( + ur_program_handle_t hProgram, const char *name, bool blockingRead, + size_t count, size_t offset, void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueReadHostPipe(ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pDst, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueWriteHostPipe(ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pSrc, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t bindlessImagesImageCopyExp( + void *pDst, void *pSrc, const ur_image_format_t *pImageFormat, + const ur_image_desc_t *pImageDesc, + ur_exp_image_copy_flags_t imageCopyFlags, ur_rect_offset_t srcOffset, + ur_rect_offset_t dstOffset, ur_rect_region_t copyExtent, + ur_rect_region_t hostExtent, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t bindlessImagesWaitExternalSemaphoreExp( + ur_exp_interop_semaphore_handle_t hSemaphore, bool hasWaitValue, + uint64_t waitValue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t bindlessImagesSignalExternalSemaphoreExp( + ur_exp_interop_semaphore_handle_t hSemaphore, bool hasSignalValue, + uint64_t signalValue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueCooperativeKernelLaunchExp( + ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t + enqueueTimestampRecordingExp(bool blocking, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueKernelLaunchCustomExp( + ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t numPropsInLaunchPropList, + const ur_exp_launch_property_t *launchPropList, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t + enqueueNativeCommandExp(ur_exp_enqueue_native_command_function_t, void *, + uint32_t, const ur_mem_handle_t *, + const ur_exp_enqueue_native_command_properties_t *, + uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) override; + using queue_type = ur_device_handle_t_::queue_group_info_t::type; // PI queue is in general a one to many mapping to L0 native queues. struct ur_queue_group_t { @@ -699,28 +878,12 @@ struct ur_queue_handle_legacy_t_ : _ur_object { // Threshold for cleaning up the EventList for immediate command lists. size_t getImmdCmmdListsEventCleanupThreshold(); - - // Pointer to the unified handle. - ur_queue_handle_t_ *UnifiedHandle; -}; - -// Unified handle that represents either legacy Queue or new dispatcher. -struct ur_queue_handle_t_ { - template - ur_queue_handle_t_(std::in_place_type_t tag, Args &&...args) - : Queue(tag, std::forward(args)...) { - if constexpr (std::is_same_v) { - std::get(Queue).UnifiedHandle = this; - } - } - - std::variant Queue; }; -template QueueT *GetQueue(ur_queue_handle_t Queue) { +template QueueT GetQueue(ur_queue_handle_t Queue) { if (!Queue) return nullptr; - auto *Q = std::get_if(&Queue->Queue); + auto *Q = dynamic_cast(Queue); if (!Q) { throw UR_RESULT_ERROR_INVALID_QUEUE; } @@ -728,7 +891,7 @@ template QueueT *GetQueue(ur_queue_handle_t Queue) { } static inline ur_queue_handle_legacy_t Legacy(ur_queue_handle_t Queue) { - return GetQueue(Queue); + return GetQueue(Queue); } // This helper function creates a ur_event_handle_t and associate a diff --git a/source/adapters/level_zero/queue_api.cpp b/source/adapters/level_zero/queue_api.cpp new file mode 100644 index 0000000000..622000a07f --- /dev/null +++ b/source/adapters/level_zero/queue_api.cpp @@ -0,0 +1,322 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM + * Exceptions. See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file queue_api.cpp + * + */ + +#include "queue_api.hpp" + +ur_queue_handle_t_::~ur_queue_handle_t_() {} + +UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, + ur_queue_info_t propName, + size_t propSize, + void *pPropValue, + size_t *pPropSizeRet) { + return hQueue->queueGetInfo(propName, propSize, pPropValue, pPropSizeRet); +} +UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain(ur_queue_handle_t hQueue) { + return hQueue->queueRetain(); +} +UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) { + return hQueue->queueRelease(); +} +UR_APIEXPORT ur_result_t UR_APICALL +urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *pDesc, + ur_native_handle_t *phNativeQueue) { + return hQueue->queueGetNativeHandle(pDesc, phNativeQueue); +} +UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(ur_queue_handle_t hQueue) { + return hQueue->queueFinish(); +} +UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush(ur_queue_handle_t hQueue) { + return hQueue->queueFlush(); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( + ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->enqueueKernelLaunch( + hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, + numEventsInWaitList, phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( + ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->enqueueEventsWait(numEventsInWaitList, phEventWaitList, + phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( + ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->enqueueEventsWaitWithBarrier(numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, + size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->enqueueMemBufferRead(hBuffer, blockingRead, offset, size, pDst, + numEventsInWaitList, phEventWaitList, + phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, + size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->enqueueMemBufferWrite(hBuffer, blockingWrite, offset, size, + pSrc, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, + ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pDst, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueMemBufferReadRect( + hBuffer, blockingRead, bufferOrigin, hostOrigin, region, bufferRowPitch, + bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, + ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueMemBufferWriteRect( + hBuffer, blockingWrite, bufferOrigin, hostOrigin, region, bufferRowPitch, + bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( + ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, + ur_mem_handle_t hBufferDst, size_t srcOffset, size_t dstOffset, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueMemBufferCopy(hBufferSrc, hBufferDst, srcOffset, + dstOffset, size, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( + ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, + ur_mem_handle_t hBufferDst, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, + size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueMemBufferCopyRect( + hBufferSrc, hBufferDst, srcOrigin, dstOrigin, region, srcRowPitch, + srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, const void *pPattern, + size_t patternSize, size_t offset, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueMemBufferFill(hBuffer, pPattern, patternSize, offset, + size, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( + ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingRead, + ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, + size_t slicePitch, void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->enqueueMemImageRead( + hImage, blockingRead, origin, region, rowPitch, slicePitch, pDst, + numEventsInWaitList, phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( + ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingWrite, + ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, + size_t slicePitch, void *pSrc, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->enqueueMemImageWrite( + hImage, blockingWrite, origin, region, rowPitch, slicePitch, pSrc, + numEventsInWaitList, phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( + ur_queue_handle_t hQueue, ur_mem_handle_t hImageSrc, + ur_mem_handle_t hImageDst, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueMemImageCopy(hImageSrc, hImageDst, srcOrigin, dstOrigin, + region, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingMap, + ur_map_flags_t mapFlags, size_t offset, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent, void **ppRetMap) { + return hQueue->enqueueMemBufferMap(hBuffer, blockingMap, mapFlags, offset, + size, numEventsInWaitList, phEventWaitList, + phEvent, ppRetMap); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( + ur_queue_handle_t hQueue, ur_mem_handle_t hMem, void *pMappedPtr, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueMemUnmap(hMem, pMappedPtr, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( + ur_queue_handle_t hQueue, void *pMem, size_t patternSize, + const void *pPattern, size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->enqueueUSMFill(pMem, patternSize, pPattern, size, + numEventsInWaitList, phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( + ur_queue_handle_t hQueue, bool blocking, void *pDst, const void *pSrc, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->enqueueUSMMemcpy(blocking, pDst, pSrc, size, + numEventsInWaitList, phEventWaitList, + phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( + ur_queue_handle_t hQueue, const void *pMem, size_t size, + ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->enqueueUSMPrefetch(pMem, size, flags, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL +urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, + ur_usm_advice_flags_t advice, ur_event_handle_t *phEvent) { + return hQueue->enqueueUSMAdvise(pMem, size, advice, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( + ur_queue_handle_t hQueue, void *pMem, size_t pitch, size_t patternSize, + const void *pPattern, size_t width, size_t height, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueUSMFill2D(pMem, pitch, patternSize, pPattern, width, + height, numEventsInWaitList, phEventWaitList, + phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( + ur_queue_handle_t hQueue, bool blocking, void *pDst, size_t dstPitch, + const void *pSrc, size_t srcPitch, size_t width, size_t height, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueUSMMemcpy2D(blocking, pDst, dstPitch, pSrc, srcPitch, + width, height, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, + bool blockingWrite, size_t count, size_t offset, const void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueDeviceGlobalVariableWrite( + hProgram, name, blockingWrite, count, offset, pSrc, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, + bool blockingRead, size_t count, size_t offset, void *pDst, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueDeviceGlobalVariableRead( + hProgram, name, blockingRead, count, offset, pDst, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, void *pDst, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueReadHostPipe(hProgram, pipe_symbol, blocking, pDst, + size, numEventsInWaitList, phEventWaitList, + phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, void *pSrc, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueWriteHostPipe(hProgram, pipe_symbol, blocking, pSrc, + size, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( + ur_queue_handle_t hQueue, void *pDst, void *pSrc, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_exp_image_copy_flags_t imageCopyFlags, ur_rect_offset_t srcOffset, + ur_rect_offset_t dstOffset, ur_rect_region_t copyExtent, + ur_rect_region_t hostExtent, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->bindlessImagesImageCopyExp( + pDst, pSrc, pImageFormat, pImageDesc, imageCopyFlags, srcOffset, + dstOffset, copyExtent, hostExtent, numEventsInWaitList, phEventWaitList, + phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( + ur_queue_handle_t hQueue, ur_exp_interop_semaphore_handle_t hSemaphore, + bool hasWaitValue, uint64_t waitValue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->bindlessImagesWaitExternalSemaphoreExp( + hSemaphore, hasWaitValue, waitValue, numEventsInWaitList, phEventWaitList, + phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( + ur_queue_handle_t hQueue, ur_exp_interop_semaphore_handle_t hSemaphore, + bool hasSignalValue, uint64_t signalValue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->bindlessImagesSignalExternalSemaphoreExp( + hSemaphore, hasSignalValue, signalValue, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( + ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->enqueueCooperativeKernelLaunchExp( + hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, + numEventsInWaitList, phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( + ur_queue_handle_t hQueue, bool blocking, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return hQueue->enqueueTimestampRecordingExp(blocking, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp( + ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t numPropsInLaunchPropList, + const ur_exp_launch_property_t *launchPropList, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueKernelLaunchCustomExp( + hKernel, workDim, pGlobalWorkSize, pLocalWorkSize, + numPropsInLaunchPropList, launchPropList, numEventsInWaitList, + phEventWaitList, phEvent); +} +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueNativeCommandExp( + ur_queue_handle_t hQueue, + ur_exp_enqueue_native_command_function_t pfnNativeEnqueue, void *data, + uint32_t numMemsInMemList, const ur_mem_handle_t *phMemList, + const ur_exp_enqueue_native_command_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueNativeCommandExp( + pfnNativeEnqueue, data, numMemsInMemList, phMemList, pProperties, + numEventsInWaitList, phEventWaitList, phEvent); +} diff --git a/source/adapters/level_zero/queue_api.hpp b/source/adapters/level_zero/queue_api.hpp new file mode 100644 index 0000000000..3c76901176 --- /dev/null +++ b/source/adapters/level_zero/queue_api.hpp @@ -0,0 +1,153 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM + * Exceptions. See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file queue_api.hpp + * + */ + +#pragma once + +#include + +struct ur_queue_handle_t_ { + virtual ~ur_queue_handle_t_(); + virtual ur_result_t queueGetInfo(ur_queue_info_t, size_t, void *, + size_t *) = 0; + virtual ur_result_t queueRetain() = 0; + virtual ur_result_t queueRelease() = 0; + virtual ur_result_t queueGetNativeHandle(ur_queue_native_desc_t *, + ur_native_handle_t *) = 0; + virtual ur_result_t queueFinish() = 0; + virtual ur_result_t queueFlush() = 0; + virtual ur_result_t enqueueKernelLaunch(ur_kernel_handle_t, uint32_t, + const size_t *, const size_t *, + const size_t *, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueEventsWait(uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueEventsWaitWithBarrier(uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueMemBufferRead(ur_mem_handle_t, bool, size_t, + size_t, void *, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueMemBufferWrite(ur_mem_handle_t, bool, size_t, + size_t, const void *, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t + enqueueMemBufferReadRect(ur_mem_handle_t, bool, ur_rect_offset_t, + ur_rect_offset_t, ur_rect_region_t, size_t, size_t, + size_t, size_t, void *, uint32_t, + const ur_event_handle_t *, ur_event_handle_t *) = 0; + virtual ur_result_t + enqueueMemBufferWriteRect(ur_mem_handle_t, bool, ur_rect_offset_t, + ur_rect_offset_t, ur_rect_region_t, size_t, size_t, + size_t, size_t, void *, uint32_t, + const ur_event_handle_t *, ur_event_handle_t *) = 0; + virtual ur_result_t enqueueMemBufferCopy(ur_mem_handle_t, ur_mem_handle_t, + size_t, size_t, size_t, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t + enqueueMemBufferCopyRect(ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t, + ur_rect_offset_t, ur_rect_region_t, size_t, size_t, + size_t, size_t, uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueMemBufferFill(ur_mem_handle_t, const void *, + size_t, size_t, size_t, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueMemImageRead(ur_mem_handle_t, bool, + ur_rect_offset_t, ur_rect_region_t, + size_t, size_t, void *, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueMemImageWrite(ur_mem_handle_t, bool, + ur_rect_offset_t, ur_rect_region_t, + size_t, size_t, void *, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueMemImageCopy(ur_mem_handle_t, ur_mem_handle_t, + ur_rect_offset_t, ur_rect_offset_t, + ur_rect_region_t, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueMemBufferMap(ur_mem_handle_t, bool, ur_map_flags_t, + size_t, size_t, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *, void **) = 0; + virtual ur_result_t enqueueMemUnmap(ur_mem_handle_t, void *, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueUSMFill(void *, size_t, const void *, size_t, + uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueUSMMemcpy(bool, void *, const void *, size_t, + uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueUSMPrefetch(const void *, size_t, + ur_usm_migration_flags_t, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueUSMAdvise(const void *, size_t, + ur_usm_advice_flags_t, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueUSMFill2D(void *, size_t, size_t, const void *, + size_t, size_t, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueUSMMemcpy2D(bool, void *, size_t, const void *, + size_t, size_t, size_t, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueDeviceGlobalVariableWrite( + ur_program_handle_t, const char *, bool, size_t, size_t, const void *, + uint32_t, const ur_event_handle_t *, ur_event_handle_t *) = 0; + virtual ur_result_t enqueueDeviceGlobalVariableRead( + ur_program_handle_t, const char *, bool, size_t, size_t, void *, uint32_t, + const ur_event_handle_t *, ur_event_handle_t *) = 0; + virtual ur_result_t enqueueReadHostPipe(ur_program_handle_t, const char *, + bool, void *, size_t, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueWriteHostPipe(ur_program_handle_t, const char *, + bool, void *, size_t, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t bindlessImagesImageCopyExp( + void *, void *, const ur_image_format_t *, const ur_image_desc_t *, + ur_exp_image_copy_flags_t, ur_rect_offset_t, ur_rect_offset_t, + ur_rect_region_t, ur_rect_region_t, uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t bindlessImagesWaitExternalSemaphoreExp( + ur_exp_interop_semaphore_handle_t, bool, uint64_t, uint32_t, + const ur_event_handle_t *, ur_event_handle_t *) = 0; + virtual ur_result_t bindlessImagesSignalExternalSemaphoreExp( + ur_exp_interop_semaphore_handle_t, bool, uint64_t, uint32_t, + const ur_event_handle_t *, ur_event_handle_t *) = 0; + virtual ur_result_t enqueueCooperativeKernelLaunchExp( + ur_kernel_handle_t, uint32_t, const size_t *, const size_t *, + const size_t *, uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueTimestampRecordingExp(bool, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueKernelLaunchCustomExp( + ur_kernel_handle_t, uint32_t, const size_t *, const size_t *, uint32_t, + const ur_exp_launch_property_t *, uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t + enqueueNativeCommandExp(ur_exp_enqueue_native_command_function_t, void *, + uint32_t, const ur_mem_handle_t *, + const ur_exp_enqueue_native_command_properties_t *, + uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) = 0; +}; diff --git a/source/adapters/level_zero/v2/queue_factory.hpp b/source/adapters/level_zero/v2/queue_factory.hpp new file mode 100644 index 0000000000..0120df5f30 --- /dev/null +++ b/source/adapters/level_zero/v2/queue_factory.hpp @@ -0,0 +1,38 @@ +//===--------- queue_factory.cpp - Level Zero Adapter --------------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "../queue.hpp" + +#include "queue_immediate_in_order.hpp" + +namespace v2 { + +inline bool shouldUseQueueV2(ur_device_handle_t Device, + ur_queue_flags_t Flags) { + const char *UrRet = std::getenv("UR_L0_USE_QUEUE_V2"); + + // only support immediate, in-order for now + return UrRet && std::stoi(UrRet) && Device->useImmediateCommandLists() && + (Flags & UR_QUEUE_FLAG_SUBMISSION_BATCHED) == 0 && + (Flags & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) == 0; +} + +inline ur_queue_handle_t createQueue(ur_context_handle_t Context, + ur_device_handle_t Device, + ur_queue_flags_t Flags) { + if (!shouldUseQueueV2(Device, Flags)) { + throw UR_RESULT_ERROR_INVALID_ARGUMENT; + } + return new ur_queue_immediate_in_order_t(Context, Device, Flags); +} + +} // namespace v2 diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp new file mode 100644 index 0000000000..4428c34aa0 --- /dev/null +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp @@ -0,0 +1,537 @@ +//===--------- queue_immediate_in_order.cpp - Level Zero Adapter ---------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "queue_immediate_in_order.hpp" + +namespace v2 { +ur_queue_immediate_in_order_t::ur_queue_immediate_in_order_t( + ur_context_handle_t, ur_device_handle_t, ur_queue_flags_t) {} + +ur_result_t +ur_queue_immediate_in_order_t::queueGetInfo(ur_queue_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { + std::ignore = propName; + std::ignore = propSize; + std::ignore = pPropValue; + std::ignore = pPropSizeRet; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::queueRetain() { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::queueRelease() { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::queueGetNativeHandle( + ur_queue_native_desc_t *pDesc, ur_native_handle_t *phNativeQueue) { + std::ignore = pDesc; + std::ignore = phNativeQueue; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::queueFinish() { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::queueFlush() { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueKernelLaunch( + ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hKernel; + std::ignore = workDim; + std::ignore = pGlobalWorkOffset; + std::ignore = pGlobalWorkSize; + std::ignore = pLocalWorkSize; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWait( + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWaitWithBarrier( + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferRead( + ur_mem_handle_t hBuffer, bool blockingRead, size_t offset, size_t size, + void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hBuffer; + std::ignore = blockingRead; + std::ignore = offset; + std::ignore = size; + std::ignore = pDst; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferWrite( + ur_mem_handle_t hBuffer, bool blockingWrite, size_t offset, size_t size, + const void *pSrc, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hBuffer; + std::ignore = blockingWrite; + std::ignore = offset; + std::ignore = size; + std::ignore = pSrc; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferReadRect( + ur_mem_handle_t hBuffer, bool blockingRead, ur_rect_offset_t bufferOrigin, + ur_rect_offset_t hostOrigin, ur_rect_region_t region, size_t bufferRowPitch, + size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, + void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hBuffer; + std::ignore = blockingRead; + std::ignore = bufferOrigin; + std::ignore = hostOrigin; + std::ignore = region; + std::ignore = bufferRowPitch; + std::ignore = bufferSlicePitch; + std::ignore = hostRowPitch; + std::ignore = hostSlicePitch; + std::ignore = pDst; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferWriteRect( + ur_mem_handle_t hBuffer, bool blockingWrite, ur_rect_offset_t bufferOrigin, + ur_rect_offset_t hostOrigin, ur_rect_region_t region, size_t bufferRowPitch, + size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, + void *pSrc, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hBuffer; + std::ignore = blockingWrite; + std::ignore = bufferOrigin; + std::ignore = hostOrigin; + std::ignore = region; + std::ignore = bufferRowPitch; + std::ignore = bufferSlicePitch; + std::ignore = hostRowPitch; + std::ignore = hostSlicePitch; + std::ignore = pSrc; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferCopy( + ur_mem_handle_t hBufferSrc, ur_mem_handle_t hBufferDst, size_t srcOffset, + size_t dstOffset, size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hBufferSrc; + std::ignore = hBufferDst; + std::ignore = srcOffset; + std::ignore = dstOffset; + std::ignore = size; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferCopyRect( + ur_mem_handle_t hBufferSrc, ur_mem_handle_t hBufferDst, + ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, + ur_rect_region_t region, size_t srcRowPitch, size_t srcSlicePitch, + size_t dstRowPitch, size_t dstSlicePitch, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hBufferSrc; + std::ignore = hBufferDst; + std::ignore = srcOrigin; + std::ignore = dstOrigin; + std::ignore = region; + std::ignore = srcRowPitch; + std::ignore = srcSlicePitch; + std::ignore = dstRowPitch; + std::ignore = dstSlicePitch; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferFill( + ur_mem_handle_t hBuffer, const void *pPattern, size_t patternSize, + size_t offset, size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hBuffer; + std::ignore = pPattern; + std::ignore = patternSize; + std::ignore = offset; + std::ignore = size; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueMemImageRead( + ur_mem_handle_t hImage, bool blockingRead, ur_rect_offset_t origin, + ur_rect_region_t region, size_t rowPitch, size_t slicePitch, void *pDst, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = hImage; + std::ignore = blockingRead; + std::ignore = origin; + std::ignore = region; + std::ignore = rowPitch; + std::ignore = slicePitch; + std::ignore = pDst; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueMemImageWrite( + ur_mem_handle_t hImage, bool blockingWrite, ur_rect_offset_t origin, + ur_rect_region_t region, size_t rowPitch, size_t slicePitch, void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = hImage; + std::ignore = blockingWrite; + std::ignore = origin; + std::ignore = region; + std::ignore = rowPitch; + std::ignore = slicePitch; + std::ignore = pSrc; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueMemImageCopy( + ur_mem_handle_t hImageSrc, ur_mem_handle_t hImageDst, + ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, + ur_rect_region_t region, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hImageSrc; + std::ignore = hImageDst; + std::ignore = srcOrigin; + std::ignore = dstOrigin; + std::ignore = region; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferMap( + ur_mem_handle_t hBuffer, bool blockingMap, ur_map_flags_t mapFlags, + size_t offset, size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, + void **ppRetMap) { + std::ignore = hBuffer; + std::ignore = blockingMap; + std::ignore = mapFlags; + std::ignore = offset; + std::ignore = size; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + std::ignore = ppRetMap; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueMemUnmap( + ur_mem_handle_t hMem, void *pMappedPtr, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hMem; + std::ignore = pMappedPtr; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueUSMFill( + void *pMem, size_t patternSize, const void *pPattern, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = pMem; + std::ignore = patternSize; + std::ignore = pPattern; + std::ignore = size; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueUSMMemcpy( + bool blocking, void *pDst, const void *pSrc, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = blocking; + std::ignore = pDst; + std::ignore = pSrc; + std::ignore = size; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueUSMPrefetch( + const void *pMem, size_t size, ur_usm_migration_flags_t flags, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = pMem; + std::ignore = size; + std::ignore = flags; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t +ur_queue_immediate_in_order_t::enqueueUSMAdvise(const void *pMem, size_t size, + ur_usm_advice_flags_t advice, + ur_event_handle_t *phEvent) { + std::ignore = pMem; + std::ignore = size; + std::ignore = advice; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueUSMFill2D( + void *pMem, size_t pitch, size_t patternSize, const void *pPattern, + size_t width, size_t height, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = pMem; + std::ignore = pitch; + std::ignore = patternSize; + std::ignore = pPattern; + std::ignore = width; + std::ignore = height; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueUSMMemcpy2D( + bool blocking, void *pDst, size_t dstPitch, const void *pSrc, + size_t srcPitch, size_t width, size_t height, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = blocking; + std::ignore = pDst; + std::ignore = dstPitch; + std::ignore = pSrc; + std::ignore = srcPitch; + std::ignore = width; + std::ignore = height; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueDeviceGlobalVariableWrite( + ur_program_handle_t hProgram, const char *name, bool blockingWrite, + size_t count, size_t offset, const void *pSrc, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hProgram; + std::ignore = name; + std::ignore = blockingWrite; + std::ignore = count; + std::ignore = offset; + std::ignore = pSrc; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueDeviceGlobalVariableRead( + ur_program_handle_t hProgram, const char *name, bool blockingRead, + size_t count, size_t offset, void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hProgram; + std::ignore = name; + std::ignore = blockingRead; + std::ignore = count; + std::ignore = offset; + std::ignore = pDst; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueReadHostPipe( + ur_program_handle_t hProgram, const char *pipe_symbol, bool blocking, + void *pDst, size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hProgram; + std::ignore = pipe_symbol; + std::ignore = blocking; + std::ignore = pDst; + std::ignore = size; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueWriteHostPipe( + ur_program_handle_t hProgram, const char *pipe_symbol, bool blocking, + void *pSrc, size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hProgram; + std::ignore = pipe_symbol; + std::ignore = blocking; + std::ignore = pSrc; + std::ignore = size; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::bindlessImagesImageCopyExp( + void *pDst, void *pSrc, const ur_image_format_t *pImageFormat, + const ur_image_desc_t *pImageDesc, ur_exp_image_copy_flags_t imageCopyFlags, + ur_rect_offset_t srcOffset, ur_rect_offset_t dstOffset, + ur_rect_region_t copyExtent, ur_rect_region_t hostExtent, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = pDst; + std::ignore = pSrc; + std::ignore = pImageFormat; + std::ignore = pImageDesc; + std::ignore = imageCopyFlags; + std::ignore = srcOffset; + std::ignore = dstOffset; + std::ignore = copyExtent; + std::ignore = hostExtent; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t +ur_queue_immediate_in_order_t::bindlessImagesWaitExternalSemaphoreExp( + ur_exp_interop_semaphore_handle_t hSemaphore, bool hasWaitValue, + uint64_t waitValue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hSemaphore; + std::ignore = hasWaitValue; + std::ignore = waitValue; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t +ur_queue_immediate_in_order_t::bindlessImagesSignalExternalSemaphoreExp( + ur_exp_interop_semaphore_handle_t hSemaphore, bool hasSignalValue, + uint64_t signalValue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hSemaphore; + std::ignore = hasSignalValue; + std::ignore = signalValue; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueCooperativeKernelLaunchExp( + ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hKernel; + std::ignore = workDim; + std::ignore = pGlobalWorkOffset; + std::ignore = pGlobalWorkSize; + std::ignore = pLocalWorkSize; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueTimestampRecordingExp( + bool blocking, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = blocking; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueKernelLaunchCustomExp( + ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numPropsInLaunchPropList, + const ur_exp_launch_property_t *launchPropList, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = hKernel; + std::ignore = workDim; + std::ignore = pGlobalWorkSize; + std::ignore = pLocalWorkSize; + std::ignore = numPropsInLaunchPropList; + std::ignore = launchPropList; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueNativeCommandExp( + ur_exp_enqueue_native_command_function_t, void *, uint32_t, + const ur_mem_handle_t *, const ur_exp_enqueue_native_command_properties_t *, + uint32_t, const ur_event_handle_t *, ur_event_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} +} // namespace v2 diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.hpp b/source/adapters/level_zero/v2/queue_immediate_in_order.hpp new file mode 100644 index 0000000000..2e3553028f --- /dev/null +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.hpp @@ -0,0 +1,207 @@ +//===--------- queue_immediate_in_order.hpp - Level Zero Adapter ---------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "../common.hpp" +#include "../queue.hpp" + +#include "ur/ur.hpp" + +namespace v2 { +struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_handle_t_ { + ur_queue_immediate_in_order_t(ur_context_handle_t, ur_device_handle_t, + ur_queue_flags_t); + + ur_result_t queueGetInfo(ur_queue_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) override; + ur_result_t queueRetain() override; + ur_result_t queueRelease() override; + ur_result_t queueGetNativeHandle(ur_queue_native_desc_t *pDesc, + ur_native_handle_t *phNativeQueue) override; + ur_result_t queueFinish() override; + ur_result_t queueFlush() override; + ur_result_t enqueueKernelLaunch(ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, + const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueEventsWait(uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t + enqueueEventsWaitWithBarrier(uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferRead(ur_mem_handle_t hBuffer, bool blockingRead, + size_t offset, size_t size, void *pDst, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferWrite(ur_mem_handle_t hBuffer, bool blockingWrite, + size_t offset, size_t size, + const void *pSrc, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferReadRect( + ur_mem_handle_t hBuffer, bool blockingRead, ur_rect_offset_t bufferOrigin, + ur_rect_offset_t hostOrigin, ur_rect_region_t region, + size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, + size_t hostSlicePitch, void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferWriteRect( + ur_mem_handle_t hBuffer, bool blockingWrite, + ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferCopy(ur_mem_handle_t hBufferSrc, + ur_mem_handle_t hBufferDst, size_t srcOffset, + size_t dstOffset, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferCopyRect( + ur_mem_handle_t hBufferSrc, ur_mem_handle_t hBufferDst, + ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, + ur_rect_region_t region, size_t srcRowPitch, size_t srcSlicePitch, + size_t dstRowPitch, size_t dstSlicePitch, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferFill(ur_mem_handle_t hBuffer, + const void *pPattern, size_t patternSize, + size_t offset, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemImageRead(ur_mem_handle_t hImage, bool blockingRead, + ur_rect_offset_t origin, + ur_rect_region_t region, size_t rowPitch, + size_t slicePitch, void *pDst, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemImageWrite(ur_mem_handle_t hImage, bool blockingWrite, + ur_rect_offset_t origin, + ur_rect_region_t region, size_t rowPitch, + size_t slicePitch, void *pSrc, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t + enqueueMemImageCopy(ur_mem_handle_t hImageSrc, ur_mem_handle_t hImageDst, + ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, + ur_rect_region_t region, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueMemBufferMap(ur_mem_handle_t hBuffer, bool blockingMap, + ur_map_flags_t mapFlags, size_t offset, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent, + void **ppRetMap) override; + ur_result_t enqueueMemUnmap(ur_mem_handle_t hMem, void *pMappedPtr, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueUSMFill(void *pMem, size_t patternSize, + const void *pPattern, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueUSMMemcpy(bool blocking, void *pDst, const void *pSrc, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueUSMFill2D(void *, size_t, size_t, const void *, size_t, + size_t, uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) override; + ur_result_t enqueueUSMMemcpy2D(bool, void *, size_t, const void *, size_t, + size_t, size_t, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) override; + ur_result_t enqueueUSMPrefetch(const void *pMem, size_t size, + ur_usm_migration_flags_t flags, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueUSMAdvise(const void *pMem, size_t size, + ur_usm_advice_flags_t advice, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueDeviceGlobalVariableWrite( + ur_program_handle_t hProgram, const char *name, bool blockingWrite, + size_t count, size_t offset, const void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueDeviceGlobalVariableRead( + ur_program_handle_t hProgram, const char *name, bool blockingRead, + size_t count, size_t offset, void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueReadHostPipe(ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pDst, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueWriteHostPipe(ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pSrc, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t bindlessImagesImageCopyExp( + void *pDst, void *pSrc, const ur_image_format_t *pImageFormat, + const ur_image_desc_t *pImageDesc, + ur_exp_image_copy_flags_t imageCopyFlags, ur_rect_offset_t srcOffset, + ur_rect_offset_t dstOffset, ur_rect_region_t copyExtent, + ur_rect_region_t hostExtent, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t bindlessImagesWaitExternalSemaphoreExp( + ur_exp_interop_semaphore_handle_t hSemaphore, bool hasWaitValue, + uint64_t waitValue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t bindlessImagesSignalExternalSemaphoreExp( + ur_exp_interop_semaphore_handle_t hSemaphore, bool hasSignalValue, + uint64_t signalValue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueCooperativeKernelLaunchExp( + ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t + enqueueTimestampRecordingExp(bool blocking, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t enqueueKernelLaunchCustomExp( + ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t numPropsInLaunchPropList, + const ur_exp_launch_property_t *launchPropList, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; + ur_result_t + enqueueNativeCommandExp(ur_exp_enqueue_native_command_function_t, void *, + uint32_t, const ur_mem_handle_t *, + const ur_exp_enqueue_native_command_properties_t *, + uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) override; +}; + +} // namespace v2