Skip to content

performance: using reserve() for reduce cost time inserts #793

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -280,6 +280,7 @@ void ZebinDecoder<numBits>::dumpSymtab(ElfT &elf, ArrayRef<const uint8_t> symtab
template <Elf::ElfIdentifierClass numBits>
std::vector<SectionInfo> ZebinDecoder<numBits>::dumpElfSections(ElfT &elf) {
std::vector<SectionInfo> sectionInfos;
sectionInfos.reserve(elf.sectionHeaders.size() - 1U);
for (size_t secId = 1U; secId < elf.sectionHeaders.size(); secId++) {
auto &[header, data] = elf.sectionHeaders[secId];
auto sectionName = elf.getSectionName(static_cast<uint32_t>(secId));
Original file line number Diff line number Diff line change
@@ -27,13 +27,18 @@ SupportedDevicesHelper::SupportedDevicesData SupportedDevicesHelper::collectSupp
SupportedDevicesData data;

// Populate IP Versions, Device Infos, Acronyms
data.deviceIpVersions.reserve(enabledDevices.size());
data.deviceInfos.reserve(enabledDevices.size());
data.acronyms.reserve(enabledDevices.size());
for (const auto &device : enabledDevices) {
data.deviceIpVersions.push_back(device.aotConfig.value);

data.deviceInfos.reserve((*device.deviceIds).size());
for (const auto &deviceId : *device.deviceIds) {
data.deviceInfos.push_back({deviceId, device.aotConfig.revision, device.aotConfig.value});
}

data.acronyms.reserve(device.deviceAcronyms.size());
for (const auto &acronym : device.deviceAcronyms) {
data.acronyms.push_back({acronym.data(), device.aotConfig.value});
}
@@ -44,6 +49,7 @@ SupportedDevicesHelper::SupportedDevicesData SupportedDevicesHelper::collectSupp
for (const auto &device : enabledDevices) {
groupedDevices[device.family].push_back(device.aotConfig.value);
}
data.familyGroups.reserve(groupedDevices.size());
for (const auto &entry : groupedDevices) {
data.familyGroups.push_back({productConfigHelper->getAcronymFromAFamily(entry.first).data(), entry.second});
}
@@ -271,13 +277,15 @@ SupportedDevicesHelper::SupportedDevicesData SupportedDevicesHelper::mergeOclocD
[](const auto &a, const auto &b) { return a.second < b.second; });

// Sort FamilyGroups (alphabetically by group name)
mergedData.familyGroups.reserve(uniqueFamilyGroups.size());
for (const auto &[family, ipVersions] : uniqueFamilyGroups) {
mergedData.familyGroups.push_back({family, std::vector<uint32_t>(ipVersions.begin(), ipVersions.end())});
}
std::sort(mergedData.familyGroups.begin(), mergedData.familyGroups.end(),
[](const auto &a, const auto &b) { return a.first < b.first; });

// Sort ReleaseGroups (alphabetically by group name)
mergedData.releaseGroups.reserve(uniqueReleaseGroups.size());
for (const auto &[release, ipVersions] : uniqueReleaseGroups) {
mergedData.releaseGroups.push_back({release, std::vector<uint32_t>(ipVersions.begin(), ipVersions.end())});
}
1 change: 1 addition & 0 deletions shared/source/aub_mem_dump/aub_mem_dump_pvc_and_later.inl
Original file line number Diff line number Diff line change
@@ -252,6 +252,7 @@ static const MMIOList mmioListVECS = {
static MMIOList mmioListCCSInstance(uint32_t mmioBase) {
MMIOList mmioList;

mmioList.reserve(17);
mmioList.push_back(MMIOPair(0x0000ce90, 0x00030003)); // GFX_MULT_CTXT_CTL - enable multi-context with 4CCS
mmioList.push_back(MMIOPair(0x0000b170, 0x00030003)); // MULT_CTXT_CTL - enable multi-context with 4CCS
mmioList.push_back(MMIOPair(0x00014800, 0xFFFF0001)); // RCU_MODE
1 change: 1 addition & 0 deletions shared/source/aub_mem_dump/aub_mem_dump_xehp_and_later.inl
Original file line number Diff line number Diff line change
@@ -234,6 +234,7 @@ static const MMIOList mmioListVECS = {
static MMIOList mmioListCCSInstance(uint32_t mmioBase) {
MMIOList mmioList;

mmioList.reserve(17);
mmioList.push_back(MMIOPair(0x0000ce90, 0x00030003)); // GFX_MULT_CTXT_CTL - enable multi-context with 4CCS
mmioList.push_back(MMIOPair(0x0000b170, 0x00030003)); // MULT_CTXT_CTL - enable multi-context with 4CCS
mmioList.push_back(MMIOPair(0x00014800, 0xFFFF0001)); // RCU_MODE
1 change: 1 addition & 0 deletions shared/source/command_container/cmdcontainer.cpp
Original file line number Diff line number Diff line change
@@ -525,6 +525,7 @@ void CommandContainer::fillReusableAllocationLists() {
return;
}

this->getResidencyContainer().reserve(amountToFill);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

auto reserveSize = this->useSecondaryCommandStream ? amountToFill * 2 : amountToFill;

for (auto i = 0u; i < amountToFill; i++) {
auto allocToReuse = obtainNextCommandBufferAllocation();
this->immediateReusableAllocationList->pushTailOne(*allocToReuse);
1 change: 1 addition & 0 deletions shared/source/command_stream/submissions_aggregator.cpp
Original file line number Diff line number Diff line change
@@ -83,6 +83,7 @@ void NEO::SubmissionAggregator::aggregateCommandBuffers(ResourcePackage &resourc
totalUsedSize += nextCommandBufferNewResourcesSize;
currentNode->inspectionId = currentInspection;

resourcePackage.reserve(newResources.size());
for (auto &newResource : newResources) {
resourcePackage.push_back(newResource);
}
8 changes: 8 additions & 0 deletions shared/source/compiler_interface/oclc_extensions.cpp
Original file line number Diff line number Diff line change
@@ -26,6 +26,8 @@ void getOpenclCFeaturesList(const HardwareInfo &hwInfo, OpenClCFeaturesContainer
openclCFeatures.push_back(openClCFeature);

if (hwInfo.capabilityTable.supportsImages) {
openclCFeatures.reserve(3);

strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_3d_image_writes");
openclCFeatures.push_back(openClCFeature);

@@ -37,6 +39,8 @@ void getOpenclCFeaturesList(const HardwareInfo &hwInfo, OpenClCFeaturesContainer
}

if (hwInfo.capabilityTable.supportsOcl21Features) {
openclCFeatures.reserve(8);

strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_atomic_order_acq_rel");
openclCFeatures.push_back(openClCFeature);

@@ -62,6 +66,8 @@ void getOpenclCFeaturesList(const HardwareInfo &hwInfo, OpenClCFeaturesContainer
openclCFeatures.push_back(openClCFeature);

if (hwInfo.capabilityTable.supportsFloatAtomics) {
openclCFeatures.reserve(8);

strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp32_global_atomic_add");
openclCFeatures.push_back(openClCFeature);

@@ -102,6 +108,8 @@ void getOpenclCFeaturesList(const HardwareInfo &hwInfo, OpenClCFeaturesContainer
openclCFeatures.push_back(openClCFeature);

if (hwInfo.capabilityTable.supportsOcl21Features && hwInfo.capabilityTable.supportsFloatAtomics) {
openclCFeatures.reserve(4);

strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp64_global_atomic_add");
openclCFeatures.push_back(openClCFeature);

2 changes: 2 additions & 0 deletions shared/source/device/device.cpp
Original file line number Diff line number Diff line change
@@ -894,6 +894,7 @@ void Device::initializeRayTracing(uint32_t maxBvhLevels) {
rtMemoryBackedBuffer = getMemoryManager()->allocateGraphicsMemoryWithProperties(allocProps);
}

rtDispatchGlobalsInfos.reserve(maxBvhLevels - rtDispatchGlobalsInfos.size() + 1);
while (rtDispatchGlobalsInfos.size() <= maxBvhLevels) {
rtDispatchGlobalsInfos.push_back(nullptr);
}
@@ -1103,6 +1104,7 @@ void Device::allocateRTDispatchGlobals(uint32_t maxBvhLevels) {
return;
}

dispatchGlobalsInfo->rtStacks.reserve(tileCount);
for (unsigned int tile = 0; tile < tileCount; tile++) {
DeviceBitfield deviceBitfield =
(tileCount == 1)
6 changes: 4 additions & 2 deletions shared/source/device_binary_format/elf/elf_decoder.cpp
Original file line number Diff line number Diff line change
@@ -80,6 +80,7 @@ Elf<numBits> decodeElf(const ArrayRef<const uint8_t> binary, std::string &outErr
}

const ElfProgramHeader<numBits> *programHeader = reinterpret_cast<const ElfProgramHeader<numBits> *>(binary.begin() + ret.elfFileHeader->phOff);
ret.programHeaders.reserve(ret.elfFileHeader->phNum);
for (decltype(ret.elfFileHeader->phNum) i = 0; i < ret.elfFileHeader->phNum; ++i) {
if (programHeader->offset + programHeader->fileSz > binary.size()) {
outErrReason = "Out of bounds program header offset/filesz, program header idx : " + std::to_string(i);
@@ -91,6 +92,7 @@ Elf<numBits> decodeElf(const ArrayRef<const uint8_t> binary, std::string &outErr
}

const ElfSectionHeader<numBits> *sectionHeader = reinterpret_cast<const ElfSectionHeader<numBits> *>(binary.begin() + ret.elfFileHeader->shOff);
ret.sectionHeaders.reserve(ret.elfFileHeader->shNum);
for (decltype(ret.elfFileHeader->shNum) i = 0; i < ret.elfFileHeader->shNum; ++i) {
ArrayRef<const uint8_t> data;
if (SHT_NOBITS != sectionHeader->type) {
@@ -151,7 +153,7 @@ bool Elf<numBits>::decodeRelocations(SectionHeaderAndData<numBits> &sectionHeade
// there may be multiple rela sections, reserve additional size
auto previousEntries = relocations.size();
auto allEntries = previousEntries + numberOfEntries;
relocs.reserve(allEntries);
relocs.reserve(allEntries - previousEntries);

for (auto i = previousEntries; i < allEntries; i++) {

@@ -186,7 +188,7 @@ bool Elf<numBits>::decodeRelocations(SectionHeaderAndData<numBits> &sectionHeade
// there may be multiple rel sections, reserve additional size
auto previousEntries = relocations.size();
auto allEntries = previousEntries + numberOfEntries;
relocs.reserve(allEntries);
relocs.reserve(allEntries - previousEntries);

for (auto i = previousEntries; i < allEntries; i++) {
int symbolIndex = extractSymbolIndex<ElfRel<numBits>>(*reloc);
2 changes: 2 additions & 0 deletions shared/source/device_binary_format/elf/elf_rewriter.h
Original file line number Diff line number Diff line change
@@ -48,9 +48,11 @@ struct ElfRewriter {

ElfRewriter(NEO::Elf::Elf<numBits> &src) {
elfFileHeader = *src.elfFileHeader;
this->sectionHeaders.reserve(src.sectionHeaders.size());
for (const auto &sh : src.sectionHeaders) {
this->sectionHeaders.push_back(std::make_unique<MutableSectionHeader<numBits>>(src.getName(sh.header->name), *sh.header, std::vector<uint8_t>{sh.data.begin(), sh.data.end()}));
}
this->programHeaders.reserve(src.programHeaders.size());
for (const auto &ph : src.programHeaders) {
this->programHeaders.push_back(std::make_unique<MutableProgramHeader<numBits>>(*ph.header, std::vector<uint8_t>{ph.data.begin(), ph.data.end()}));
for (const auto &sh : this->sectionHeaders) {
1 change: 1 addition & 0 deletions shared/source/gen12lp/gfx_core_helper_gen12lp.cpp
Original file line number Diff line number Diff line change
@@ -101,6 +101,7 @@ const EngineInstancesContainer GfxCoreHelperHw<Family>::getGpgpuEngineInstances(
engines.push_back({aub_stream::ENGINE_CCS, EngineUsage::regular});
}

engines.reserve(3);
engines.push_back({aub_stream::ENGINE_RCS, EngineUsage::regular});
engines.push_back({aub_stream::ENGINE_RCS, EngineUsage::lowPriority}); // low priority
engines.push_back({defaultEngine, EngineUsage::internal}); // internal usage
1 change: 1 addition & 0 deletions shared/source/helpers/bindless_heaps_helper.cpp
Original file line number Diff line number Diff line change
@@ -55,6 +55,7 @@ BindlessHeapsHelper::BindlessHeapsHelper(Device *rootDevice, bool isMultiOsConte
rootDeviceIndex(rootDevice->getRootDeviceIndex()),
deviceBitfield(rootDevice->getDeviceBitfield()) {

ssHeapsAllocations.reserve(BindlesHeapType::numHeapTypes);
for (auto heapType = 0; heapType < BindlesHeapType::numHeapTypes; heapType++) {
auto size = MemoryConstants::pageSize64k;

1 change: 1 addition & 0 deletions shared/source/helpers/gfx_core_helper_xehp_and_later.inl
Original file line number Diff line number Diff line change
@@ -128,6 +128,7 @@ aub_stream::MMIOList GfxCoreHelperHw<GfxFamily>::getExtraMmioList(const Hardware
uint32_t value = 1; // [0] enable
value |= (format << 3); // [3:7] compression_format

mmioList.reserve(3);
mmioList.push_back({0x519C, value});
mmioList.push_back({0xB0F0, value});
mmioList.push_back({0xE4C0, value});
25 changes: 22 additions & 3 deletions shared/source/helpers/l3_range.h
Original file line number Diff line number Diff line change
@@ -106,6 +106,12 @@ inline bool operator!=(const L3Range &lhs, const L3Range &rhs) {
return (false == (lhs == rhs));
}

template <class T>
constexpr bool isCanPreallocStlContainer(const std::vector<T>& v) { return true; }

template <class T1, class T2>
constexpr bool isCanPreallocStlContainer(const std::unordered_map<T1, T2>& v) { return true; }

template <typename ContainerT>
inline void coverRangeExactImpl(uint64_t address, uint64_t size, ContainerT &ret, uint64_t policy) {
UNRECOVERABLE_IF(false == L3Range::meetsMinimumAlignment(address));
@@ -114,10 +120,23 @@ inline void coverRangeExactImpl(uint64_t address, uint64_t size, ContainerT &ret
const uint64_t end = address + size;

uint64_t offset = address;

uint64_t maxRangeSizeBySize;
uint64_t maxRangeSizeByOffset;
uint64_t rangeSize;

if constexpr (is_can_prealloc_stl_container(ret)) {
maxRangeSizeBySize = Math::prevPowerOfTwo(end - offset);
maxRangeSizeByOffset = offset ? (1ULL << Math::ffs(offset)) : L3Range::maxSingleRange;
rangeSize = std::min(maxRangeSizeBySize, maxRangeSizeByOffset);
rangeSize = std::min(rangeSize, +L3Range::maxSingleRange);
ret.reserve((end - offset) / rangeSize);
}

while (offset < end) {
uint64_t maxRangeSizeBySize = Math::prevPowerOfTwo(end - offset);
uint64_t maxRangeSizeByOffset = offset ? (1ULL << Math::ffs(offset)) : L3Range::maxSingleRange;
uint64_t rangeSize = std::min(maxRangeSizeBySize, maxRangeSizeByOffset);
maxRangeSizeBySize = Math::prevPowerOfTwo(end - offset);
maxRangeSizeByOffset = offset ? (1ULL << Math::ffs(offset)) : L3Range::maxSingleRange;
rangeSize = std::min(maxRangeSizeBySize, maxRangeSizeByOffset);
rangeSize = std::min(rangeSize, +L3Range::maxSingleRange);
ret.push_back(L3Range::fromAddressSizeWithPolicy(offset, rangeSize, policy));
offset += rangeSize;
1 change: 1 addition & 0 deletions shared/source/xe_hpc_core/gfx_core_helper_xe_hpc_core.cpp
Original file line number Diff line number Diff line change
@@ -50,6 +50,7 @@ const EngineInstancesContainer GfxCoreHelperHw<Family>::getGpgpuEngineInstances(
EngineInstancesContainer engines;

if (hwInfo.featureTable.flags.ftrCCSNode) {
engines.reserve(hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled);
for (uint32_t i = 0; i < hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled; i++) {
engines.push_back({static_cast<aub_stream::EngineType>(i + aub_stream::ENGINE_CCS), EngineUsage::regular});
if (productHelper.isCooperativeEngineSupported(hwInfo)) {
1 change: 1 addition & 0 deletions shared/source/xe_hpg_core/gfx_core_helper_xe_hpg_core.cpp
Original file line number Diff line number Diff line change
@@ -173,6 +173,7 @@ const EngineInstancesContainer GfxCoreHelperHw<Family>::getGpgpuEngineInstances(
auto ailHelper = rootDeviceEnvironment.getAILConfigurationHelper();
auto forceRcs = ailHelper && ailHelper->forceRcs();
if (hwInfo.featureTable.flags.ftrCCSNode && !forceRcs) {
engines.reserve(hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled);
for (uint32_t i = 0; i < hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled; i++) {
engines.push_back({static_cast<aub_stream::EngineType>(i + aub_stream::ENGINE_CCS), EngineUsage::regular});
}