From 167d3ae483564810b19933e200fb311ea0039ee9 Mon Sep 17 00:00:00 2001 From: Lukas Tenbrink Date: Thu, 10 Oct 2024 22:16:02 +0200 Subject: [PATCH] Optimize strided loop assignment for runtime-contiguous layout types with contiguous assignment --- include/xtensor/core/xassign.hpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/xtensor/core/xassign.hpp b/include/xtensor/core/xassign.hpp index 904113814..73a659312 100644 --- a/include/xtensor/core/xassign.hpp +++ b/include/xtensor/core/xassign.hpp @@ -1135,6 +1135,8 @@ namespace xt std::size_t simd_size = inner_loop_size / simd_type::size; std::size_t simd_rest = inner_loop_size % simd_type::size; + bool e1_is_contiguous = E1::contiguous_layout || e1.is_contiguous(); + auto fct_stepper = e2.stepper_begin(e1.shape()); auto res_stepper = e1.stepper_begin(e1.shape()); @@ -1185,7 +1187,7 @@ namespace xt fct_stepper.to_begin(); // need to step E1 as well if not contigous assign (e.g. view) - if (!E1::contiguous_layout) + if (!E1::contiguous_layout && !e1_is_contiguous) { res_stepper.to_begin(); for (std::size_t i = 0; i < idx.size(); ++i) @@ -1260,7 +1262,7 @@ namespace xt fct_stepper.to_begin(); // need to step E1 as well if not contigous assign (e.g. view) - if (!E1::contiguous_layout) + if (!E1::contiguous_layout && !e1_is_contiguous) { res_stepper.to_begin(); for (std::size_t i = 0; i < idx.size(); ++i) @@ -1305,7 +1307,7 @@ namespace xt fct_stepper.to_begin(); // need to step E1 as well if not contigous assign (e.g. view) - if (!E1::contiguous_layout) + if (!E1::contiguous_layout && !e1_is_contiguous) { res_stepper.to_begin(); for (std::size_t i = 0; i < idx.size(); ++i)