@@ -25,18 +25,31 @@ struct TransferLoop
2525
2626 // IOTA: Use the index as the fetching offset
2727 // Non IOTA: Read the address buffer ("index buffer") to select fetching offset
28- const uint64_t srcAddressBufferOffset = SrcIndexIota ? srcOffset : vk::RawBufferLoad<uint32_t>(transferRequest.srcIndexAddr + srcOffset * sizeof (uint32_t));
29- const uint64_t dstAddressBufferOffset = DstIndexIota ? dstOffset : vk::RawBufferLoad<uint32_t>(transferRequest.dstIndexAddr + dstOffset * sizeof (uint32_t));
28+ uint64_t srcAddressBufferOffset;
29+ uint64_t dstAddressBufferOffset;
30+
31+ if (SrcIndexIota) srcAddressBufferOffset = srcOffset;
32+ else
33+ {
34+ if (SrcIndexSizeLog2 == 0 ) {} // we can't read individual byte
35+ else if (SrcIndexSizeLog2 == 1 ) srcAddressBufferOffset = vk::RawBufferLoad<uint16_t>(transferRequest.srcIndexAddr + srcOffset * sizeof (uint16_t));
36+ else if (SrcIndexSizeLog2 == 2 ) srcAddressBufferOffset = vk::RawBufferLoad<uint32_t>(transferRequest.srcIndexAddr + srcOffset * sizeof (uint32_t));
37+ else if (SrcIndexSizeLog2 == 3 ) srcAddressBufferOffset = vk::RawBufferLoad<uint64_t>(transferRequest.srcIndexAddr + srcOffset * sizeof (uint64_t));
38+ }
39+
40+ if (DstIndexIota) dstAddressBufferOffset = dstOffset;
41+ else
42+ {
43+ if (DstIndexSizeLog2 == 0 ) {} // we can't read individual byte
44+ else if (DstIndexSizeLog2 == 1 ) dstAddressBufferOffset = vk::RawBufferLoad<uint16_t>(transferRequest.dstIndexAddr + dstOffset * sizeof (uint16_t));
45+ else if (DstIndexSizeLog2 == 2 ) dstAddressBufferOffset = vk::RawBufferLoad<uint32_t>(transferRequest.dstIndexAddr + dstOffset * sizeof (uint32_t));
46+ else if (DstIndexSizeLog2 == 3 ) dstAddressBufferOffset = vk::RawBufferLoad<uint64_t>(transferRequest.dstIndexAddr + dstOffset * sizeof (uint64_t));
47+ }
3048
3149 const uint64_t srcAddressMapped = transferRequest.srcAddr + srcAddressBufferOffset * srcIndexSize;
3250 const uint64_t dstAddressMapped = transferRequest.dstAddr + dstAddressBufferOffset * dstIndexSize;
3351
34- //vk::RawBufferStore<uint64_t>(transferRequest.dstAddr + invocationIndex * sizeof(uint64_t) * 2, srcAddressMapped,8);
35- //vk::RawBufferStore<uint64_t>(transferRequest.dstAddr + invocationIndex * sizeof(uint64_t) * 2 + sizeof(uint64_t), dstAddressMapped,8);
36- if (SrcIndexSizeLog2 == 0 ) {} // we can't write individual bytes
37- else if (SrcIndexSizeLog2 == 1 ) vk::RawBufferStore<uint16_t>(dstAddressMapped, vk::RawBufferLoad<uint16_t>(srcAddressMapped));
38- else if (SrcIndexSizeLog2 == 2 ) vk::RawBufferStore<uint32_t>(dstAddressMapped, vk::RawBufferLoad<uint32_t>(srcAddressMapped));
39- else if (SrcIndexSizeLog2 == 3 ) vk::RawBufferStore<uint64_t>(dstAddressMapped, vk::RawBufferLoad<uint64_t>(srcAddressMapped));
52+ vk::RawBufferStore<uint32_t>(dstAddressMapped, vk::RawBufferLoad<uint32_t>(srcAddressMapped));
4053 }
4154
4255 void copyLoop (NBL_CONST_REF_ARG (TransferDispatchInfo) dispatchInfo, uint baseInvocationIndex, uint propertyId, TransferRequest transferRequest, uint dispatchSize)
0 commit comments