From a6e8ab57a65e07f299c128b9eb6c2821989db77e Mon Sep 17 00:00:00 2001 From: Frederick Roy Date: Tue, 17 Mar 2026 14:24:13 +0900 Subject: [PATCH] add benchmarks on tovec free functions --- src/benchmarks/Sofa.Type/Vec.cpp | 116 ++++++++++++++++++++++++++++++- 1 file changed, 114 insertions(+), 2 deletions(-) diff --git a/src/benchmarks/Sofa.Type/Vec.cpp b/src/benchmarks/Sofa.Type/Vec.cpp index daac2e8..fb1db13 100644 --- a/src/benchmarks/Sofa.Type/Vec.cpp +++ b/src/benchmarks/Sofa.Type/Vec.cpp @@ -21,12 +21,18 @@ static void BM_Vec_InitMemset(benchmark::State& state); static void BM_Vec_CopySimpleEqual(benchmark::State& state); static void BM_Vec_CopySimpleLoop(benchmark::State& state); static void BM_Vec_CopyStdCopy(benchmark::State& state); +template +static void BM_Vec_ConvertToVecNoop(benchmark::State& state); +template +static void BM_Vec_ConvertToVecIdentical(benchmark::State& state); +template +static void BM_Vec_ConvertToVecDifferent(benchmark::State& state); using stdarray3f = std::array; using sofatypefixedarray3f = sofa::type::fixed_array; -constexpr int64_t minSubIterations = 8 << 4; -constexpr int64_t maxSubIterations = 8 << 6; +constexpr int64_t minSubIterations = 8 << 14; +constexpr int64_t maxSubIterations = 8 << 16; BENCHMARK(BM_Vec_dot)->RangeMultiplier(2)->Ranges({ {minSubIterations, maxSubIterations} })->Unit(benchmark::kMicrosecond); BENCHMARK_TEMPLATE1(BM_Vec_stdinnerproduct, stdarray3f)->RangeMultiplier(2)->Ranges({ {minSubIterations, maxSubIterations} })->Unit(benchmark::kMicrosecond); @@ -37,6 +43,21 @@ BENCHMARK(BM_Vec_InitMemset)->RangeMultiplier(2)->Ranges({ {minSubIterations, ma BENCHMARK(BM_Vec_CopyStdCopy)->RangeMultiplier(2)->Ranges({ {minSubIterations, maxSubIterations} })->Unit(benchmark::kMicrosecond); BENCHMARK(BM_Vec_CopySimpleLoop)->RangeMultiplier(2)->Ranges({ {minSubIterations, maxSubIterations} })->Unit(benchmark::kMicrosecond); BENCHMARK(BM_Vec_CopySimpleEqual)->RangeMultiplier(2)->Ranges({ {minSubIterations, maxSubIterations} })->Unit(benchmark::kMicrosecond); +BENCHMARK_TEMPLATE1(BM_Vec_ConvertToVecNoop, sofa::type::Vec3f)->RangeMultiplier(2)->Ranges({ {minSubIterations, maxSubIterations} })->Unit(benchmark::kMicrosecond); +BENCHMARK_TEMPLATE1(BM_Vec_ConvertToVecNoop, sofa::type::Vec3d)->RangeMultiplier(2)->Ranges({ {minSubIterations, maxSubIterations} })->Unit(benchmark::kMicrosecond); +BENCHMARK_TEMPLATE1(BM_Vec_ConvertToVecNoop, sofa::type::Vec6f)->RangeMultiplier(2)->Ranges({ {minSubIterations, maxSubIterations} })->Unit(benchmark::kMicrosecond); +BENCHMARK_TEMPLATE1(BM_Vec_ConvertToVecNoop, sofa::type::Vec6d)->RangeMultiplier(2)->Ranges({ {minSubIterations, maxSubIterations} })->Unit(benchmark::kMicrosecond); +BENCHMARK_TEMPLATE1(BM_Vec_ConvertToVecIdentical, sofa::type::Vec3f)->RangeMultiplier(2)->Ranges({ {minSubIterations, maxSubIterations} })->Unit(benchmark::kMicrosecond); +BENCHMARK_TEMPLATE1(BM_Vec_ConvertToVecIdentical, sofa::type::Vec3d)->RangeMultiplier(2)->Ranges({ {minSubIterations, maxSubIterations} })->Unit(benchmark::kMicrosecond); +BENCHMARK_TEMPLATE1(BM_Vec_ConvertToVecIdentical, sofa::type::Vec6f)->RangeMultiplier(2)->Ranges({ {minSubIterations, maxSubIterations} })->Unit(benchmark::kMicrosecond); +BENCHMARK_TEMPLATE1(BM_Vec_ConvertToVecIdentical, sofa::type::Vec6d)->RangeMultiplier(2)->Ranges({ {minSubIterations, maxSubIterations} })->Unit(benchmark::kMicrosecond); +BENCHMARK_TEMPLATE1(BM_Vec_ConvertToVecIdentical, sofa::type::Vec3i)->RangeMultiplier(2)->Ranges({ {minSubIterations, maxSubIterations} })->Unit(benchmark::kMicrosecond); +BENCHMARK_TEMPLATE2(BM_Vec_ConvertToVecDifferent, sofa::type::Vec3d, sofa::type::Vec3f)->RangeMultiplier(2)->Ranges({ {minSubIterations, maxSubIterations} })->Unit(benchmark::kMicrosecond); +BENCHMARK_TEMPLATE2(BM_Vec_ConvertToVecDifferent, sofa::type::Vec3f, sofa::type::Vec3d)->RangeMultiplier(2)->Ranges({ {minSubIterations, maxSubIterations} })->Unit(benchmark::kMicrosecond); +BENCHMARK_TEMPLATE2(BM_Vec_ConvertToVecDifferent, sofa::type::Vec3d, sofa::type::Vec6d)->RangeMultiplier(2)->Ranges({ {minSubIterations, maxSubIterations} })->Unit(benchmark::kMicrosecond); +BENCHMARK_TEMPLATE2(BM_Vec_ConvertToVecDifferent, sofa::type::Vec3d, sofa::type::Vec6f)->RangeMultiplier(2)->Ranges({ {minSubIterations, maxSubIterations} })->Unit(benchmark::kMicrosecond); +BENCHMARK_TEMPLATE2(BM_Vec_ConvertToVecDifferent, sofa::type::Vec3d, sofa::type::Vec3i)->RangeMultiplier(2)->Ranges({ {minSubIterations, maxSubIterations} })->Unit(benchmark::kMicrosecond); +BENCHMARK_TEMPLATE2(BM_Vec_ConvertToVecDifferent, sofa::type::Vec3d, sofa::type::Vec6i)->RangeMultiplier(2)->Ranges({ {minSubIterations, maxSubIterations} })->Unit(benchmark::kMicrosecond); // test on dot() for sofa::type::Vec void BM_Vec_dot(benchmark::State& state) @@ -211,3 +232,94 @@ void BM_Vec_CopySimpleEqual(benchmark::State& state) b = list; } } + +template +void emplace_from_values(VectorContainer& list, const typename Container::value_type* values, std::size_t i) +{ + constexpr auto ContainerSize = Container::static_size; + + // Build an index sequence 0, 1, ..., ContainerSize-1 + [&](std::index_sequence) { + list.emplace_back(values[i * ContainerSize + Is]...); + }(std::make_index_sequence{}); +} + +template +void BM_Vec_ConvertToVecNoop(benchmark::State& state) +{ + constexpr auto ContainerSize = Container::static_size; + constexpr auto totalsize = maxSubIterations * ContainerSize; + const auto& values = RandomValuePool::get(); + + sofa::type::vector list; + list.reserve(state.range(0)); + + for (unsigned int i = 0; i < state.range(0); i++) + { + emplace_from_values(list, values.data(), ContainerSize); + } + + for (auto _ : state) + { + typename Container::value_type sum{}; + for (unsigned int i = 0; i < state.range(0); ++i) + { + sum += list[i][0]; + } + benchmark::DoNotOptimize(sum); + } +} + +template +void BM_Vec_ConvertToVecIdentical(benchmark::State& state) +{ + constexpr auto ContainerSize = Container::static_size; + constexpr auto totalsize = maxSubIterations * ContainerSize; + const auto& values = RandomValuePool::get(); + + sofa::type::vector list; + list.reserve(state.range(0)); + + for (unsigned int i = 0; i < state.range(0); i++) + { + emplace_from_values(list, values.data(), ContainerSize); + } + + for (auto _ : state) + { + typename Container::value_type sum{}; + for (unsigned int i = 0; i < state.range(0); ++i) + { + const auto v = sofa::type::toVecN(list[i]); + sum += v[0]; + } + benchmark::DoNotOptimize(sum); + } +} + +template +void BM_Vec_ConvertToVecDifferent(benchmark::State& state) +{ + constexpr auto FromContainerSize = FromContainer::static_size; + constexpr auto totalsize = maxSubIterations * FromContainerSize; + const auto& values = RandomValuePool::get(); + + sofa::type::vector list; + list.reserve(state.range(0)); + + for (unsigned int i = 0; i < state.range(0); i++) + { + emplace_from_values(list, values.data(), FromContainerSize); + } + + for (auto _ : state) + { + typename ToContainer::value_type sum{}; + for (unsigned int i = 0; i < state.range(0); ++i) + { + const auto v = sofa::type::toVecN(list[i]); + sum += v[0]; + } + benchmark::DoNotOptimize(sum); + } +}