Skip to content

Commit 41f8bc5

Browse files
[GpuOclRuntime] Retain input and release created cl_events (#367)
Call clRetainEvent() for each event passed to OclContext and clReleaseEvent() for each created event. Also, minor refactoring of the events management code.
1 parent 6074df6 commit 41f8bc5

File tree

2 files changed

+74
-36
lines changed

2 files changed

+74
-36
lines changed

include/gc/ExecutionEngine/GPURuntime/GpuOclRuntime.h

+13-28
Original file line numberDiff line numberDiff line change
@@ -149,33 +149,27 @@ static constexpr auto ZERO_PTR = const_cast<int64_t *>(&ZERO);
149149
struct OclContext {
150150
const OclRuntime &runtime;
151151
const cl_command_queue queue;
152-
// Preserve the execution order. This is required in case of out-of-order
153-
// execution (CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE). When the execution
154-
// is completed, the 'lastEvent' field contains the event of the last enqueued
155-
// command. If this field is false, 'waitList' is ignored.
156-
const bool preserveOrder;
152+
// Create 'cl_event' object, for each enqueued command, that can be used to
153+
// query or wait for the command to complete. This is required in case of
154+
// out-of-order execution (CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE), but can
155+
// also be used to get the last event. When the execution is completed, the
156+
// 'lastEvent' field contains the event of the last enqueued command. If this
157+
// field is false, 'waitList' is ignored.
158+
const bool createEvents;
157159
cl_uint waitListLen;
158160
cl_event *waitList;
159161
cl_event lastEvent;
160162

161-
explicit OclContext(const OclRuntime &runtime, cl_command_queue queue,
162-
cl_uint waitListLen = 0, cl_event *waitList = nullptr)
163-
: OclContext(runtime, queue, OclRuntime::isOutOfOrder(queue), waitListLen,
164-
waitList) {}
163+
explicit OclContext(const OclRuntime &runtime, cl_command_queue queue)
164+
: OclContext(runtime, queue, OclRuntime::isOutOfOrder(queue)) {}
165165

166166
explicit OclContext(const OclRuntime &runtime, cl_command_queue queue,
167-
bool preserveOrder, cl_uint waitListLen,
168-
cl_event *waitList)
169-
: runtime(runtime), queue(queue), preserveOrder(preserveOrder),
170-
waitListLen(preserveOrder ? waitListLen : 0),
171-
waitList(preserveOrder ? waitList : nullptr), lastEvent(nullptr),
172-
clPtrs(nullptr) {
173-
assert(!OclRuntime::isOutOfOrder(queue) || preserveOrder);
174-
assert(preserveOrder || (waitListLen == 0 && waitList == nullptr));
175-
}
167+
bool createEvents, cl_uint waitListLen = 0,
168+
cl_event *waitList = nullptr);
176169

177170
OclContext(const OclContext &) = delete;
178171
OclContext &operator=(const OclContext &) = delete;
172+
~OclContext();
179173

180174
[[nodiscard]] llvm::Expected<bool> finish();
181175

@@ -186,16 +180,7 @@ struct OclContext {
186180
template <unsigned N> friend struct StaticExecutor;
187181
std::unordered_set<void *> *clPtrs;
188182

189-
void setLastEvent(cl_event event) {
190-
lastEvent = event;
191-
if (event) {
192-
waitListLen = 1;
193-
waitList = &lastEvent;
194-
} else {
195-
waitListLen = 0;
196-
waitList = nullptr;
197-
}
198-
}
183+
void setLastEvent(cl_event event);
199184
};
200185

201186
struct OclModule {

lib/gc/ExecutionEngine/GPURuntime/ocl/GpuOclRuntime.cpp

+61-8
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ struct OclRuntime::Exports {
295295
}
296296
va_end(args);
297297

298-
if (ctx->preserveOrder) {
298+
if (ctx->createEvents) {
299299
cl_event event = nullptr;
300300
err = clEnqueueNDRangeKernel(ctx->queue, cloned.kernel, 3, nullptr,
301301
kernel->globalSize, kernel->localSize,
@@ -541,7 +541,7 @@ llvm::Expected<bool> OclRuntime::usmFree(const void *ptr) const {
541541
llvm::Expected<bool> OclRuntime::usmCpy(OclContext &ctx, const void *src,
542542
void *dst, size_t size) const {
543543
cl_int err;
544-
if (ctx.preserveOrder) {
544+
if (ctx.createEvents) {
545545
cl_event event;
546546
err = ext.clEnqueueMemcpyINTEL(ctx.queue, false, dst, src, size,
547547
ctx.waitListLen, ctx.waitList, &event);
@@ -572,16 +572,69 @@ void OclRuntime::debug(const char *file, int line, const char *msg) {
572572
}
573573
#endif
574574

575+
OclContext::OclContext(const OclRuntime &runtime, cl_command_queue queue,
576+
bool createEvents, cl_uint waitListLen,
577+
cl_event *waitList)
578+
: runtime(runtime), queue(queue), createEvents(createEvents),
579+
waitListLen(createEvents ? waitListLen : 0),
580+
waitList(createEvents ? waitList : nullptr), lastEvent(nullptr),
581+
clPtrs(nullptr) {
582+
assert(!OclRuntime::isOutOfOrder(queue) || createEvents);
583+
assert(createEvents || (waitListLen == 0 && waitList == nullptr));
584+
for (cl_uint i = 0; i < waitListLen; i++) {
585+
gcLogD("Retaining OpenCL event: ", waitList[i]);
586+
CL_CHECKR(clRetainEvent(waitList[i]),
587+
"Failed to retain OpenCL event: ", waitList[i]);
588+
}
589+
}
590+
591+
OclContext::~OclContext() {
592+
for (cl_uint i = 0; i < waitListLen; i++) {
593+
gcLogD("Releasing OpenCL event: ", waitList[i]);
594+
CL_CHECKR(clReleaseEvent(waitList[i]),
595+
"Failed to release OpenCL event: ", waitList[i]);
596+
}
597+
}
598+
575599
llvm::Expected<bool> OclContext::finish() {
576-
gcLogD("Waiting for the enqueued OpenCL commands to finish: ", queue);
577-
CL_CHECK(clFinish(queue),
578-
"Failed to finish the OpenCL command queue: ", queue);
579-
if (preserveOrder) {
600+
if (createEvents) {
601+
if (waitListLen) {
602+
gcLogD("Waiting for ", waitListLen, " OpenCL events to finish.");
603+
CL_CHECK(clWaitForEvents(waitListLen, waitList),
604+
"Failed to wait for OpenCL events.");
605+
606+
for (cl_uint i = 0; i < waitListLen; i++) {
607+
gcLogD("Releasing OpenCL event: ", waitList[i]);
608+
CL_CHECK(clReleaseEvent(waitList[i]),
609+
"Failed to release OpenCL event: ", waitList[i]);
610+
}
611+
waitListLen = 0;
612+
waitList = nullptr;
613+
}
614+
} else {
615+
gcLogD("Waiting for the enqueued OpenCL commands to finish: ", queue);
616+
CL_CHECK(clFinish(queue),
617+
"Failed to finish the OpenCL command queue: ", queue);
618+
}
619+
return true;
620+
}
621+
622+
void OclContext::setLastEvent(cl_event event) {
623+
for (cl_uint i = 0; i < waitListLen; i++) {
624+
gcLogD("Releasing OpenCL event: ", waitList[i]);
625+
CL_CHECKR(clReleaseEvent(waitList[i]),
626+
"Failed to release OpenCL event: ", waitList[i]);
627+
}
628+
629+
gcLogD("Setting the last OpenCL event: ", event);
630+
lastEvent = event;
631+
if (event) {
632+
waitListLen = 1;
633+
waitList = &lastEvent;
634+
} else {
580635
waitListLen = 0;
581636
waitList = nullptr;
582-
lastEvent = nullptr;
583637
}
584-
return true;
585638
}
586639

587640
OclModule::~OclModule() {

0 commit comments

Comments
 (0)