Skip to content
Draft
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
711fe77
SIMD homogeneous matrix multiplicaiton
SamFlt Nov 26, 2025
196660c
implement first version of simd for rotaiton matrix
SamFlt Nov 27, 2025
729110a
AVX implem for rotation matmul
SamFlt Dec 2, 2025
e230a72
Fix intrinsics usage, performance improvement
SamFlt Dec 2, 2025
1ff0b57
Simd version of the rbt dense depth, AVX matmul version for 3xN input…
SamFlt Dec 3, 2025
690c718
Export SIMD intrinsics utils in a separate header
SamFlt Dec 3, 2025
4c88d39
Fix test, improve vpRBDenseDepth
SamFlt Dec 4, 2025
77adf9d
Remove debug prints
SamFlt Dec 4, 2025
1f3c7c8
Move initVVS to cpp file, resize matrix there
SamFlt Dec 5, 2025
cc729b6
Add ENABLE_NATIVE_ARCH option for gcc
SamFlt Dec 5, 2025
a05c329
Remove reference to MBT tukey estimator, disable prints
SamFlt Dec 5, 2025
d847bfb
Fix SSE3 flag check
SamFlt Dec 6, 2025
c58271a
Merge branch 'master' into fast_homogeneous_proj
fspindle Dec 7, 2025
c4bc02a
Update copyright headers
fspindle Dec 7, 2025
49e8c83
Fix warning unused variable
fspindle Dec 7, 2025
ae5966b
Remove useless empty lines
fspindle Dec 7, 2025
3f3bac2
Fix warning variable set but not used
fspindle Dec 7, 2025
adc26ab
Remove vpSIMD namespace from doxygen doc
fspindle Dec 7, 2025
c6ff19a
Fix bug when input vector is not transposed and AVX or SSE2 not avail…
fspindle Dec 7, 2025
51e89a3
Fix bug when input vector is not transposed and AVX or SSE3 not avail…
fspindle Dec 7, 2025
09fc8cd
Cleanup tests to help debugging
fspindle Dec 7, 2025
f9f59e2
Fix _mm_hadd_pd() usage that requires SSE3 on pixi windows CI
fspindle Dec 7, 2025
06b044c
Merge branch 'master' into fast_homogeneous_proj
fspindle Dec 12, 2025
7502eac
Make vpSIMDUtils.h private to not expose SIMD code to the user
fspindle Dec 12, 2025
1f60797
Remove to make code more explicit
fspindle Dec 12, 2025
671b883
Make test independent from SIMD instruction set
fspindle Dec 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#############################################################################
#
# ViSP, open source Visual Servoing Platform software.
# Copyright (C) 2005 - 2024 by Inria. All rights reserved.
# Copyright (C) 2005 - 2025 by Inria. All rights reserved.
#
# This software is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -381,6 +381,9 @@ VP_OPTION(ENABLE_SSSE3 "" "" "Enable SSSE3 instructions" "" ON IF ((MSVC OR CMAK
if(X86_64)
VP_OPTION(ENABLE_AVX "" "" "Enable AVX instructions" "" OFF) # should be explicitly enabled, used in matrix transpose code
endif()
if(CMAKE_COMPILER_IS_GNUCXX)
VP_OPTION(ENABLE_NATIVE_ARCH "" "" "Enable all available CPU instruction sets" "" OFF)
endif()

#----------------------------------------------------------------------
# BLAS / LAPACK
Expand Down
37 changes: 21 additions & 16 deletions cmake/AddExtraCompilationFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -171,25 +171,30 @@ if(CMAKE_COMPILER_IS_GNUCXX)
add_extra_compiler_options("-fvisibility-inlines-hidden")
endif()

if(ENABLE_AVX AND X86_64)
add_extra_compiler_options("-mavx")
if(ENABLE_NATIVE_ARCH)
add_extra_compiler_options("-march=native")
else()
if(ENABLE_SSE2)
add_extra_compiler_options("-msse2")
elseif(X86 OR X86_64)
add_extra_compiler_options("-mno-sse2")
endif()

if(ENABLE_SSE3)
add_extra_compiler_options("-msse3")
elseif(X86 OR X86_64)
#add_extra_compiler_options("-mno-sse3")
endif()
if(ENABLE_AVX AND X86_64)
add_extra_compiler_options("-mavx")
else()
if(ENABLE_SSE2)
add_extra_compiler_options("-msse2")
elseif(X86 OR X86_64)
add_extra_compiler_options("-mno-sse2")
endif()

if(ENABLE_SSE3)
add_extra_compiler_options("-msse3")
elseif(X86 OR X86_64)
#add_extra_compiler_options("-mno-sse3")
endif()

if(ENABLE_SSSE3)
add_extra_compiler_options("-mssse3")
elseif(X86 OR X86_64)
add_extra_compiler_options("-mno-ssse3")
if(ENABLE_SSSE3)
add_extra_compiler_options("-mssse3")
elseif(X86 OR X86_64)
add_extra_compiler_options("-mno-ssse3")
endif()
endif()
endif()

Expand Down
2 changes: 2 additions & 0 deletions modules/core/include/visp3/core/vpHomogeneousMatrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,8 @@ class VISP_EXPORT vpHomogeneousMatrix : public vpArray2D<double>
vpHomogeneousMatrix &operator<<(double val);
vpHomogeneousMatrix &operator,(double val);

void project(const vpMatrix &inputPoints, vpMatrix &outputPoints, bool isTransposed) const;

void orthogonalizeRotation();

void print() const;
Expand Down
2 changes: 2 additions & 0 deletions modules/core/include/visp3/core/vpRotationMatrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ class VISP_EXPORT vpRotationMatrix : public vpArray2D<double>

void printVector();

void rotateVectors(const vpMatrix &inputs, vpMatrix &outputs, bool isTransposed) const;

/*!
This function is not applicable to a rotation matrix that is always a
3-by-3 matrix.
Expand Down
231 changes: 231 additions & 0 deletions modules/core/include/visp3/core/vpSIMDUtils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
/*
* ViSP, open source Visual Servoing Platform software.
* Copyright (C) 2005 - 2025 by Inria. All rights reserved.
*
* This software is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
* See the file LICENSE.txt at the root directory of this source
* distribution for additional information about the GNU GPL.
*
* For using ViSP with software that can not be combined with the GNU
* GPL, please contact Inria about acquiring a ViSP Professional
* Edition License.
*
* See https://visp.inria.fr for more information.
*
* This software was developed at:
* Inria Rennes - Bretagne Atlantique
* Campus Universitaire de Beaulieu
* 35042 Rennes Cedex
* France
*
* If you have questions regarding the use of this file, please contact
* Inria at [email protected]
*
* This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
* WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*
* Description:
* SIMD utilities.
*/

/*!
\file vpSIMDUtils.h
\brief Header that defines and includes useful SIMD routines and macros
*/

#ifndef VP_SIMD_UTILS_H
#define VP_SIMD_UTILS_H
#include <visp3/core/vpConfig.h>

#ifndef DOXYGEN_SHOULD_SKIP_THIS

#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
#include <emmintrin.h>
#include <immintrin.h>
#include <smmintrin.h>

#define VISP_HAVE_SSE2 1
#endif

#if defined __AVX2__
#define VISP_HAVE_AVX2 1
#endif

#if defined __AVX__
#define VISP_HAVE_AVX 1
#endif

// https://stackoverflow.com/a/40765925
#if !defined(__FMA__) && defined(__AVX2__)
#define __FMA__ 1
#endif


#if defined(__FMA__)
#define VISP_HAVE_FMA
#endif

#if defined _WIN32 && defined(_M_ARM64)
#define _ARM64_DISTINCT_NEON_TYPES
#include <Intrin.h>
#include <arm_neon.h>
#define VISP_HAVE_NEON 1
#elif (defined(__ARM_NEON__) || defined (__ARM_NEON)) && defined(__aarch64__)
#include <arm_neon.h>
#define VISP_HAVE_NEON 1
#else
#define VISP_HAVE_NEON 0
#endif

#if VISP_HAVE_SSE2 && USE_SIMD_CODE
#define USE_SSE 1
#else
#define USE_SSE 0
#endif

#if VISP_HAVE_NEON && USE_SIMD_CODE
#define USE_NEON 1
#else
#define USE_NEON 0
#endif

namespace vpSIMD
{
#if defined(VISP_HAVE_AVX2)
using Register = __m512d;

inline constexpr int numLanes = 8;
inline const Register add(const Register a, const Register b)
{
return _mm512_add_pd(a, b);
}

inline Register sub(const Register a, const Register b)
{
return _mm512_sub_pd(a, b);
}

inline Register mul(const Register a, const Register b)
{
return _mm512_mul_pd(a, b);
}

inline Register fma(const Register a, const Register b, const Register c)
{
#if defined(VISP_HAVE_FMA)
return _mm512_fmadd_pd(a, b, c);
#else
return add(mul(a, b), c);
#endif
}

inline Register loadu(const double *const data)
{
return _mm512_loadu_pd(data);
}

inline Register set1(double v)
{
return _mm512_set1_pd(v);
}

inline void storeu(double *data, const Register a)
{
_mm512_storeu_pd(data, a);
}

#elif defined(VISP_HAVE_AVX)
using Register = __m256d;
inline const int numLanes = 4;

inline Register add(const Register a, const Register b)
{
return _mm256_add_pd(a, b);
}

inline Register sub(const Register a, const Register b)
{
return _mm256_sub_pd(a, b);
}

inline Register mul(const Register a, const Register b)
{
return _mm256_mul_pd(a, b);
}

inline Register fma(const Register a, const Register b, const Register c)
{
#if defined(VISP_HAVE_FMA)
return _mm256_fmadd_pd(a, b, c);
#else
return add(mul(a, b), c);
#endif
}

inline Register loadu(const double *const data)
{
return _mm256_loadu_pd(data);
}

inline Register set1(double v)
{
return _mm256_set1_pd(v);
}

inline void storeu(double *data, const Register a)
{
_mm256_storeu_pd(data, a);
}

#elif VISP_HAVE_SSE2
using Register = __m128d;
inline const int numLanes = 2;

inline Register add(const Register a, const Register b)
{
return _mm_add_pd(a, b);
}

inline Register sub(const Register a, const Register b)
{
return _mm_sub_pd(a, b);
}

inline Register mul(const Register a, const Register b)
{
return _mm_mul_pd(a, b);
}

inline Register fma(const Register a, const Register b, const Register c)
{
#if defined(VISP_HAVE_FMA)
return _mm_fmadd_pd(a, b, c);
#else
return add(mul(a, b), c);
#endif
}

inline Register loadu(const double *const data)
{
return _mm_loadu_pd(data);
}

inline Register set1(double v)
{
return _mm_set1_pd(v);
}

inline void storeu(double *data, const Register a)
{
_mm_storeu_pd(data, a);
}

#endif

}

#endif // DOXYGEN_SHOULD_SKIP_THIS
#endif // VP_SIMD_UTILS_H
9 changes: 6 additions & 3 deletions modules/core/src/math/robust/vpRobust.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,9 @@ vpRobust &vpRobust::operator=(const vpRobust &&other)
void vpRobust::resize(unsigned int n_data)
{
if (n_data != m_size) {
m_normres.resize(n_data);
m_sorted_normres.resize(n_data);
m_sorted_residues.resize(n_data);
m_normres.resize(n_data, false);
m_sorted_normres.resize(n_data, false);
m_sorted_residues.resize(n_data, false);
m_size = n_data;
}
}
Expand Down Expand Up @@ -143,6 +143,8 @@ void vpRobust::MEstimator(const vpRobustEstimatorType method, const vpColVector

// Calculate median
med = select(m_sorted_residues, 0, n_data - 1, ind_med);

// med = select(m_sorted_residues, 0, n_data - 1, ind_med);
// --comment: residualMedian = med

// Normalize residues
Expand All @@ -153,6 +155,7 @@ void vpRobust::MEstimator(const vpRobustEstimatorType method, const vpColVector

// Calculate MAD
normmedian = select(m_sorted_normres, 0, n_data - 1, ind_med);

// normalizedResidualMedian = normmedian ;
// 1.48 keeps scale estimate consistent for a normal probability dist.
m_mad = 1.4826 * normmedian; // median Absolute Deviation
Expand Down
Loading
Loading