@@ -1293,6 +1293,303 @@ TEST_P(
12931293 lzt::destroy_function (mulKernel);
12941294}
12951295
1296+ TEST_F (
1297+ zeMutableCommandListTests,
1298+ GivenMutationOfMultipleKernelCapabilitiesAndEventsWhenCommandListIsClosedThenEverythingIsUpdatedCorrectly) {
1299+ if (!CheckExtensionSupport (ZE_MUTABLE_COMMAND_LIST_EXP_VERSION_1_1) ||
1300+ !signalEventSupport || !waitEventsSupport || !kernelInstructionSupport ||
1301+ !globalOffsetSupport || !groupCountSupport || !groupSizeSupport ||
1302+ !kernelArgumentsSupport) {
1303+ GTEST_SKIP () << " Not all required extensions are supported" ;
1304+ }
1305+
1306+ const int32_t buffer_size = 16384 ;
1307+ const int32_t init_buffer_val = 100 ;
1308+ const int32_t add_val = 20 ;
1309+ const int32_t mul_val = 30 ;
1310+ const int32_t sub_val = 40 ;
1311+ const int32_t div_val = 4 ;
1312+ const int32_t global_offset_x = 5 ;
1313+ const int32_t mutated_global_offset_x = 5 ;
1314+ const int32_t mutated_mul_val = 60 ;
1315+ const int32_t mutated_sub_val = 80 ;
1316+ const int32_t part_of_buffer_to_fill_1 = 2 ;
1317+ const int32_t part_of_buffer_to_fill_2 = 8 ;
1318+
1319+ lzt::zeEventPool event_pool;
1320+ const uint32_t events_number = 4 ;
1321+ std::vector<ze_event_handle_t > events (events_number, nullptr );
1322+ event_pool.InitEventPool (context, events_number,
1323+ ZE_EVENT_POOL_FLAG_HOST_VISIBLE);
1324+ event_pool.create_events (events, events_number);
1325+
1326+ int32_t *in_out_buffer_1 = reinterpret_cast <int32_t *>(
1327+ lzt::allocate_host_memory (buffer_size * sizeof (int32_t )));
1328+ int32_t *in_out_buffer_2 = reinterpret_cast <int32_t *>(
1329+ lzt::allocate_host_memory (buffer_size * sizeof (int32_t )));
1330+ for (size_t i = 0 ; i < buffer_size; i++) {
1331+ in_out_buffer_1[i] = init_buffer_val;
1332+ in_out_buffer_2[i] = init_buffer_val;
1333+ }
1334+
1335+ uint32_t group_size_x = 0 ;
1336+ uint32_t group_size_y = 0 ;
1337+ uint32_t group_size_z = 0 ;
1338+
1339+ ze_kernel_handle_t add_kernel = lzt::create_function (module , " addValue" );
1340+ ze_kernel_handle_t mul_kernel = lzt::create_function (module , " mulValue" );
1341+ ze_kernel_handle_t sub_kernel = lzt::create_function (module , " subValue" );
1342+ ze_kernel_handle_t div_kernel = lzt::create_function (module , " divValue" );
1343+
1344+ uint64_t kernel_command_id_1 = 0 ;
1345+ uint64_t kernel_command_id_2 = 0 ;
1346+ uint64_t kernel_command_id_3 = 0 ;
1347+ std::vector<ze_kernel_handle_t > kernels{add_kernel, mul_kernel, sub_kernel,
1348+ div_kernel};
1349+ commandIdDesc.flags = ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS |
1350+ ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT |
1351+ ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_SIZE |
1352+ ZE_MUTABLE_COMMAND_EXP_FLAG_SIGNAL_EVENT |
1353+ ZE_MUTABLE_COMMAND_EXP_FLAG_WAIT_EVENTS |
1354+ ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_INSTRUCTION |
1355+ ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET;
1356+
1357+ lzt::suggest_group_size (add_kernel, buffer_size, 1 , 1 , group_size_x,
1358+ group_size_y, group_size_z);
1359+ const uint32_t mutated_group_size_x = group_size_x / 2 ;
1360+
1361+ ze_group_count_t group_count{
1362+ buffer_size / group_size_x / part_of_buffer_to_fill_1, 1 , 1 };
1363+ ze_group_count_t mutated_group_count{
1364+ buffer_size / mutated_group_size_x / part_of_buffer_to_fill_2, 1 , 1 };
1365+
1366+ // 1 add_kernel
1367+ EXPECT_EQ (ZE_RESULT_SUCCESS,
1368+ zeKernelSetGlobalOffsetExp (add_kernel, global_offset_x, 0 , 0 ));
1369+ lzt::set_group_size (add_kernel, group_size_x, group_size_y, group_size_z);
1370+ lzt::set_argument_value (add_kernel, 0 , sizeof (void *), &in_out_buffer_1);
1371+ lzt::set_argument_value (add_kernel, 1 , sizeof (add_val), &add_val);
1372+ EXPECT_EQ (ZE_RESULT_SUCCESS,
1373+ zeCommandListGetNextCommandIdWithKernelsExp (
1374+ mutableCmdList, &commandIdDesc, kernels.size (), kernels.data (),
1375+ &kernel_command_id_1));
1376+ lzt::append_launch_function (mutableCmdList, add_kernel, &group_count,
1377+ events[0 ], 0 , nullptr );
1378+ // 2 mul_kernel
1379+ EXPECT_EQ (ZE_RESULT_SUCCESS,
1380+ zeKernelSetGlobalOffsetExp (mul_kernel, global_offset_x, 0 , 0 ));
1381+ lzt::set_group_size (mul_kernel, group_size_x, group_size_y, group_size_z);
1382+ lzt::set_argument_value (mul_kernel, 0 , sizeof (void *), &in_out_buffer_1);
1383+ lzt::set_argument_value (mul_kernel, 1 , sizeof (mul_val), &mul_val);
1384+
1385+ EXPECT_EQ (ZE_RESULT_SUCCESS,
1386+ zeCommandListGetNextCommandIdWithKernelsExp (
1387+ mutableCmdList, &commandIdDesc, kernels.size (), kernels.data (),
1388+ &kernel_command_id_2));
1389+ lzt::append_launch_function (mutableCmdList, mul_kernel, &group_count,
1390+ events[1 ], 1 , &events[0 ]);
1391+ // 3 sub_kernel
1392+ EXPECT_EQ (ZE_RESULT_SUCCESS,
1393+ zeKernelSetGlobalOffsetExp (sub_kernel, global_offset_x, 0 , 0 ));
1394+ lzt::set_group_size (sub_kernel, group_size_x, group_size_y, group_size_z);
1395+ lzt::set_argument_value (sub_kernel, 0 , sizeof (void *), &in_out_buffer_1);
1396+ lzt::set_argument_value (sub_kernel, 1 , sizeof (sub_val), &sub_val);
1397+
1398+ EXPECT_EQ (ZE_RESULT_SUCCESS,
1399+ zeCommandListGetNextCommandIdWithKernelsExp (
1400+ mutableCmdList, &commandIdDesc, kernels.size (), kernels.data (),
1401+ &kernel_command_id_3));
1402+ lzt::append_launch_function (mutableCmdList, sub_kernel, &group_count, nullptr ,
1403+ 2 , &events[0 ]);
1404+
1405+ lzt::close_command_list (mutableCmdList);
1406+ lzt::execute_command_lists (queue, 1 , &mutableCmdList, nullptr );
1407+ lzt::synchronize (queue, std::numeric_limits<uint64_t >::max ());
1408+
1409+ EXPECT_EQ (ZE_RESULT_SUCCESS, zeEventQueryStatus (events[0 ]));
1410+ EXPECT_EQ (ZE_RESULT_SUCCESS, zeEventQueryStatus (events[1 ]));
1411+ EXPECT_EQ (ZE_RESULT_SUCCESS, zeEventHostReset (events[0 ]));
1412+ EXPECT_EQ (ZE_RESULT_SUCCESS, zeEventHostReset (events[1 ]));
1413+ const uint32_t first_result =
1414+ ((init_buffer_val + add_val) * mul_val) - sub_val;
1415+ for (size_t i = 0 ; i < global_offset_x; i++) {
1416+ EXPECT_EQ (in_out_buffer_1[i], init_buffer_val);
1417+ }
1418+ for (size_t i = global_offset_x;
1419+ i < buffer_size / part_of_buffer_to_fill_1 + global_offset_x; i++) {
1420+ EXPECT_EQ (in_out_buffer_1[i], first_result);
1421+ }
1422+ for (size_t i = buffer_size / part_of_buffer_to_fill_1 + global_offset_x;
1423+ i < buffer_size; i++) {
1424+ EXPECT_EQ (in_out_buffer_1[i], init_buffer_val);
1425+ }
1426+
1427+ // Update events
1428+ EXPECT_EQ (ZE_RESULT_SUCCESS,
1429+ zeCommandListUpdateMutableCommandSignalEventExp (
1430+ mutableCmdList, kernel_command_id_1, events[2 ]));
1431+ EXPECT_EQ (ZE_RESULT_SUCCESS,
1432+ zeCommandListUpdateMutableCommandSignalEventExp (
1433+ mutableCmdList, kernel_command_id_2, events[3 ]));
1434+ EXPECT_EQ (ZE_RESULT_SUCCESS,
1435+ zeCommandListUpdateMutableCommandWaitEventsExp (
1436+ mutableCmdList, kernel_command_id_2, 1 , &events[2 ]));
1437+ EXPECT_EQ (ZE_RESULT_SUCCESS,
1438+ zeCommandListUpdateMutableCommandWaitEventsExp (
1439+ mutableCmdList, kernel_command_id_3, 2 , &events[2 ]));
1440+
1441+ // Change kernels sequence from add, mul, sub to mul, sub, div
1442+ std::vector<uint64_t > commandIds{kernel_command_id_1, kernel_command_id_2,
1443+ kernel_command_id_3};
1444+ std::vector<ze_kernel_handle_t > newSequenceOfKernels{mul_kernel, sub_kernel,
1445+ div_kernel};
1446+
1447+ EXPECT_EQ (ZE_RESULT_SUCCESS, zeCommandListUpdateMutableCommandKernelsExp (
1448+ mutableCmdList, 3 , commandIds.data (),
1449+ newSequenceOfKernels.data ()));
1450+
1451+ // Mutate invalidated data for kernel 1
1452+ ze_mutable_global_offset_exp_desc_t mutate_global_offset = {
1453+ ZE_STRUCTURE_TYPE_MUTABLE_GLOBAL_OFFSET_EXP_DESC};
1454+ mutate_global_offset.commandId = kernel_command_id_1;
1455+ mutate_global_offset.offsetX = mutated_global_offset_x;
1456+ mutate_global_offset.offsetY = 0 ;
1457+ mutate_global_offset.offsetZ = 0 ;
1458+ mutate_global_offset.pNext = nullptr ;
1459+ ze_mutable_group_count_exp_desc_t mutate_group_count{
1460+ ZE_STRUCTURE_TYPE_MUTABLE_GROUP_COUNT_EXP_DESC};
1461+ mutate_group_count.commandId = kernel_command_id_1;
1462+ mutate_group_count.pGroupCount = &mutated_group_count;
1463+ mutate_group_count.pNext = &mutate_global_offset;
1464+ ze_mutable_group_size_exp_desc_t mutate_group_size{
1465+ ZE_STRUCTURE_TYPE_MUTABLE_GROUP_SIZE_EXP_DESC};
1466+ mutate_group_size.commandId = kernel_command_id_1;
1467+ mutate_group_size.groupSizeX = mutated_group_size_x;
1468+ mutate_group_size.groupSizeY = group_size_y;
1469+ mutate_group_size.groupSizeZ = group_size_z;
1470+ mutate_group_size.pNext = &mutate_group_count;
1471+ ze_mutable_kernel_argument_exp_desc_t mutate_buffer_kernel_arg{
1472+ ZE_STRUCTURE_TYPE_MUTABLE_KERNEL_ARGUMENT_EXP_DESC};
1473+ mutate_buffer_kernel_arg.commandId = kernel_command_id_1;
1474+ mutate_buffer_kernel_arg.argIndex = 0 ;
1475+ mutate_buffer_kernel_arg.argSize = sizeof (void *);
1476+ mutate_buffer_kernel_arg.pArgValue = &in_out_buffer_2;
1477+ mutate_buffer_kernel_arg.pNext = &mutate_group_size;
1478+ ze_mutable_kernel_argument_exp_desc_t mutate_scalar_kernel_arg{
1479+ ZE_STRUCTURE_TYPE_MUTABLE_KERNEL_ARGUMENT_EXP_DESC};
1480+ mutate_scalar_kernel_arg.commandId = kernel_command_id_1;
1481+ mutate_scalar_kernel_arg.argIndex = 1 ;
1482+ mutate_scalar_kernel_arg.argSize = sizeof (mutated_mul_val);
1483+ mutate_scalar_kernel_arg.pArgValue = &mutated_mul_val;
1484+ mutate_scalar_kernel_arg.pNext = &mutate_buffer_kernel_arg;
1485+
1486+ // Mutate invalidated data for kernel 2
1487+ ze_mutable_global_offset_exp_desc_t mutate_global_offset_2 = {
1488+ ZE_STRUCTURE_TYPE_MUTABLE_GLOBAL_OFFSET_EXP_DESC};
1489+ mutate_global_offset_2.commandId = kernel_command_id_2;
1490+ mutate_global_offset_2.offsetX = mutated_global_offset_x;
1491+ mutate_global_offset_2.offsetY = 0 ;
1492+ mutate_global_offset_2.offsetZ = 0 ;
1493+ mutate_global_offset_2.pNext = &mutate_scalar_kernel_arg;
1494+ ze_mutable_group_count_exp_desc_t mutate_group_count_2{
1495+ ZE_STRUCTURE_TYPE_MUTABLE_GROUP_COUNT_EXP_DESC};
1496+ mutate_group_count_2.commandId = kernel_command_id_2;
1497+ mutate_group_count_2.pGroupCount = &mutated_group_count;
1498+ mutate_group_count_2.pNext = &mutate_global_offset_2;
1499+ ze_mutable_group_size_exp_desc_t mutate_group_size_2{
1500+ ZE_STRUCTURE_TYPE_MUTABLE_GROUP_SIZE_EXP_DESC};
1501+ mutate_group_size_2.commandId = kernel_command_id_2;
1502+ mutate_group_size_2.groupSizeX = mutated_group_size_x;
1503+ mutate_group_size_2.groupSizeY = group_size_y;
1504+ mutate_group_size_2.groupSizeZ = group_size_z;
1505+ mutate_group_size_2.pNext = &mutate_group_count_2;
1506+ ze_mutable_kernel_argument_exp_desc_t mutate_buffer_kernel_arg_2{
1507+ ZE_STRUCTURE_TYPE_MUTABLE_KERNEL_ARGUMENT_EXP_DESC};
1508+ mutate_buffer_kernel_arg_2.commandId = kernel_command_id_2;
1509+ mutate_buffer_kernel_arg_2.argIndex = 0 ;
1510+ mutate_buffer_kernel_arg_2.argSize = sizeof (void *);
1511+ mutate_buffer_kernel_arg_2.pArgValue = &in_out_buffer_2;
1512+ mutate_buffer_kernel_arg_2.pNext = &mutate_group_size_2;
1513+ ze_mutable_kernel_argument_exp_desc_t mutate_scalar_kernel_arg_2{
1514+ ZE_STRUCTURE_TYPE_MUTABLE_KERNEL_ARGUMENT_EXP_DESC};
1515+ mutate_scalar_kernel_arg_2.commandId = kernel_command_id_2;
1516+ mutate_scalar_kernel_arg_2.argIndex = 1 ;
1517+ mutate_scalar_kernel_arg_2.argSize = sizeof (mutated_sub_val);
1518+ mutate_scalar_kernel_arg_2.pArgValue = &mutated_sub_val;
1519+ mutate_scalar_kernel_arg_2.pNext = &mutate_buffer_kernel_arg_2;
1520+
1521+ // Mutate invalidated data for kernel 3
1522+ ze_mutable_global_offset_exp_desc_t mutate_global_offset_3 = {
1523+ ZE_STRUCTURE_TYPE_MUTABLE_GLOBAL_OFFSET_EXP_DESC};
1524+ mutate_global_offset_3.commandId = kernel_command_id_3;
1525+ mutate_global_offset_3.offsetX = mutated_global_offset_x;
1526+ mutate_global_offset_3.offsetY = 0 ;
1527+ mutate_global_offset_3.offsetZ = 0 ;
1528+ mutate_global_offset_3.pNext = &mutate_scalar_kernel_arg_2;
1529+ ze_mutable_group_count_exp_desc_t mutate_group_count_3{
1530+ ZE_STRUCTURE_TYPE_MUTABLE_GROUP_COUNT_EXP_DESC};
1531+ mutate_group_count_3.commandId = kernel_command_id_3;
1532+ mutate_group_count_3.pGroupCount = &mutated_group_count;
1533+ mutate_group_count_3.pNext = &mutate_global_offset_3;
1534+ ze_mutable_group_size_exp_desc_t mutate_group_size_3{
1535+ ZE_STRUCTURE_TYPE_MUTABLE_GROUP_SIZE_EXP_DESC};
1536+ mutate_group_size_3.commandId = kernel_command_id_3;
1537+ mutate_group_size_3.groupSizeX = mutated_group_size_x;
1538+ mutate_group_size_3.groupSizeY = group_size_y;
1539+ mutate_group_size_3.groupSizeZ = group_size_z;
1540+ mutate_group_size_3.pNext = &mutate_group_count_3;
1541+ ze_mutable_kernel_argument_exp_desc_t mutate_buffer_kernel_arg_3{
1542+ ZE_STRUCTURE_TYPE_MUTABLE_KERNEL_ARGUMENT_EXP_DESC};
1543+ mutate_buffer_kernel_arg_3.commandId = kernel_command_id_3;
1544+ mutate_buffer_kernel_arg_3.argIndex = 0 ;
1545+ mutate_buffer_kernel_arg_3.argSize = sizeof (void *);
1546+ mutate_buffer_kernel_arg_3.pArgValue = &in_out_buffer_2;
1547+ mutate_buffer_kernel_arg_3.pNext = &mutate_group_size_3;
1548+ ze_mutable_kernel_argument_exp_desc_t mutate_scalar_kernel_arg_3{
1549+ ZE_STRUCTURE_TYPE_MUTABLE_KERNEL_ARGUMENT_EXP_DESC};
1550+ mutate_scalar_kernel_arg_3.commandId = kernel_command_id_3;
1551+ mutate_scalar_kernel_arg_3.argIndex = 1 ;
1552+ mutate_scalar_kernel_arg_3.argSize = sizeof (div_val);
1553+ mutate_scalar_kernel_arg_3.pArgValue = &div_val;
1554+ mutate_scalar_kernel_arg_3.pNext = &mutate_buffer_kernel_arg_3;
1555+
1556+ mutableCmdDesc.pNext = &mutate_scalar_kernel_arg_3;
1557+ EXPECT_EQ (ZE_RESULT_SUCCESS, zeCommandListUpdateMutableCommandsExp (
1558+ mutableCmdList, &mutableCmdDesc));
1559+
1560+ lzt::close_command_list (mutableCmdList);
1561+ lzt::execute_command_lists (queue, 1 , &mutableCmdList, nullptr );
1562+ lzt::synchronize (queue, std::numeric_limits<uint64_t >::max ());
1563+
1564+ EXPECT_EQ (ZE_RESULT_NOT_READY, zeEventQueryStatus (events[0 ]));
1565+ EXPECT_EQ (ZE_RESULT_NOT_READY, zeEventQueryStatus (events[1 ]));
1566+ EXPECT_EQ (ZE_RESULT_SUCCESS, zeEventQueryStatus (events[2 ]));
1567+ EXPECT_EQ (ZE_RESULT_SUCCESS, zeEventQueryStatus (events[3 ]));
1568+ const uint32_t second_result =
1569+ ((init_buffer_val * mutated_mul_val) - mutated_sub_val) / div_val;
1570+ for (size_t i = 0 ; i < mutated_global_offset_x; i++) {
1571+ EXPECT_EQ (in_out_buffer_2[i], init_buffer_val);
1572+ }
1573+ for (size_t i = mutated_global_offset_x;
1574+ i < buffer_size / part_of_buffer_to_fill_2 + mutated_global_offset_x;
1575+ i++) {
1576+ EXPECT_EQ (in_out_buffer_2[i], second_result);
1577+ }
1578+ for (size_t i =
1579+ buffer_size / part_of_buffer_to_fill_2 + mutated_global_offset_x;
1580+ i < buffer_size; i++) {
1581+ EXPECT_EQ (in_out_buffer_2[i], init_buffer_val);
1582+ }
1583+
1584+ event_pool.destroy_events (events);
1585+ lzt::free_memory (in_out_buffer_1);
1586+ lzt::free_memory (in_out_buffer_2);
1587+ lzt::destroy_function (add_kernel);
1588+ lzt::destroy_function (mul_kernel);
1589+ lzt::destroy_function (sub_kernel);
1590+ lzt::destroy_function (div_kernel);
1591+ }
1592+
12961593INSTANTIATE_TEST_SUITE_P (
12971594 zeMutableCommandListTests, zeMutableCommandListTestsEvents,
12981595 testing::Values (ZE_EVENT_POOL_FLAG_HOST_VISIBLE,
0 commit comments