Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
cdca164
feat(vector): optimize vector_mix for vector4f with SSE
nfrechette Mar 1, 2025
35c55f1
feat(vector): document vector_mix SSE instruction cost and disable mo…
nfrechette Mar 2, 2025
0792035
style(vector): convert spaces to tabs
nfrechette Mar 2, 2025
c591a9e
feat(vector): optimize vector_mix for vector4f with NEON
nfrechette Mar 2, 2025
28fa0cf
fix(vector): rework ifdef to ensure everything is fully specified
nfrechette Mar 2, 2025
ff816df
feat(tools): add script to generate vector_mix specializations
nfrechette Mar 2, 2025
e1d9e0b
feat(vector): explicitly inline vector_dup variants to ensure optimal…
nfrechette Mar 2, 2025
87e7df7
feat(vector): use __builtin_shufflevector with gcc/clang
nfrechette Mar 2, 2025
1ad42c5
style(vector): fix indentation which triggers compiler warning
nfrechette Mar 2, 2025
2d8c8db
fix(tools): avoid including unnecessary headers in vector_mix unit tests
nfrechette Mar 2, 2025
b93ff8d
fix(vector): return a dummy value to avoid warnings
nfrechette Mar 2, 2025
15a7e90
feat(vector): optimize vector_mix for float64 with SSE
nfrechette Mar 2, 2025
1abdd93
fix(vector): check if __has_builtin exists before using it
nfrechette Mar 3, 2025
043647a
fix(vector): wrap __has_builtin for early GCC versions
nfrechette Mar 7, 2025
4abb0a9
tests(qvs): avoid VS2015 compiler code generation bug
nfrechette Mar 9, 2025
fe3853a
tests(qvv): avoid VS2015 compiler code generation bug
nfrechette Mar 9, 2025
1f29a6f
feat(vector): explicitly inline vector_dup variants to ensure optimal…
nfrechette Mar 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions includes/rtm/impl/compiler_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,13 @@
// Joins two pre-processor tokens: RTM_JOIN_TOKENS(foo, bar) yields 'foobar'
//////////////////////////////////////////////////////////////////////////
#define RTM_JOIN_TOKENS(a, b) a ## b

//////////////////////////////////////////////////////////////////////////
// Wraps the __has_builtin pre-processor macro to handle non-clang and early
// GCC compilers
//////////////////////////////////////////////////////////////////////////
#if defined(__has_builtin)
#define RTM_HAS_BUILTIN(x) __has_builtin(x)
#else
#define RTM_HAS_BUILTIN(x) 0
#endif
702 changes: 697 additions & 5 deletions includes/rtm/vector4d.h

Large diffs are not rendered by default.

548 changes: 433 additions & 115 deletions includes/rtm/vector4f.h

Large diffs are not rendered by default.

153 changes: 80 additions & 73 deletions tests/sources/test_qvs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,89 @@
using namespace rtm;

template<typename TransformType, typename FloatType>
static void test_qvs_impl(const TransformType& identity, const FloatType threshold)
static void test_qvs_interpolation(const FloatType threshold)
{
using QuatType = typename related_types<FloatType>::quat;
using Vector4Type = typename related_types<FloatType>::vector4;
using ScalarType = typename related_types<FloatType>::scalar;

FloatType alpha = FloatType(0.33);
ScalarType alpha_s = scalar_set(alpha);
QuatType quat0 = quat_normalize(quat_from_euler(scalar_deg_to_rad(FloatType(30.0)), scalar_deg_to_rad(FloatType(-45.0)), scalar_deg_to_rad(FloatType(90.0))));
QuatType quat1 = quat_normalize(quat_from_euler(scalar_deg_to_rad(FloatType(45.0)), scalar_deg_to_rad(FloatType(60.0)), scalar_deg_to_rad(FloatType(120.0))));

QuatType quat_ref_lerp = quat_lerp(quat0, quat1, alpha);
QuatType quat_ref_lerp_s = quat_lerp(quat0, quat1, alpha_s);
QuatType quat_ref_slerp = quat_slerp(quat0, quat1, alpha);
QuatType quat_ref_slerp_s = quat_slerp(quat0, quat1, alpha_s);

Vector4Type trans0 = vector_set(FloatType(-0.001138), FloatType(0.91623), FloatType(-1.624598));
Vector4Type trans1 = vector_set(FloatType(-0.001138), FloatType(0.91623), FloatType(-1.624598));

Vector4Type trans_ref = vector_lerp(trans0, trans1, alpha);
Vector4Type trans_ref_s = vector_lerp(trans0, trans1, alpha_s);

FloatType scale0 = FloatType(-1.915);
FloatType scale1 = FloatType(-0.2113);

FloatType scale_ref = scalar_lerp(scale0, scale1, alpha);
ScalarType scale_ref_s = scalar_lerp(scalar_set(scale0), scalar_set(scale1), alpha_s);

TransformType transform0 = qvs_set(quat0, trans0, scale0);
TransformType transform1 = qvs_set(quat1, trans1, scale1);

TransformType transform_ref_lerp = qvs_set(quat_ref_lerp, trans_ref, scale_ref);
TransformType transform_ref_slerp = qvs_set(quat_ref_slerp, trans_ref, scale_ref);

TransformType transform_ref_lerp_s = qvs_set(quat_ref_lerp_s, trans_ref_s, scale_ref_s);
TransformType transform_ref_slerp_s = qvs_set(quat_ref_slerp_s, trans_ref_s, scale_ref_s);

TransformType transform_lerp = qvs_lerp(transform0, transform1, alpha);
TransformType transform_lerp_s = qvs_lerp(transform0, transform1, alpha_s);

TransformType transform_slerp = qvs_slerp(transform0, transform1, alpha);
TransformType transform_slerp_s = qvs_slerp(transform0, transform1, alpha_s);

TransformType transform_lerp_no_scale = qvs_lerp_no_scale(transform0, transform1, alpha);
TransformType transform_lerp_no_scale_s = qvs_lerp_no_scale(transform0, transform1, alpha_s);

TransformType transform_slerp_no_scale = qvs_slerp_no_scale(transform0, transform1, alpha);
TransformType transform_slerp_no_scale_s = qvs_slerp_no_scale(transform0, transform1, alpha_s);

CHECK(quat_near_equal(transform_lerp.rotation, transform_ref_lerp.rotation, threshold));
CHECK(quat_near_equal(transform_lerp_s.rotation, transform_ref_lerp_s.rotation, threshold));
CHECK(vector_all_near_equal3(qvs_get_translation(transform_lerp), qvs_get_translation(transform_ref_lerp), threshold));
CHECK(vector_all_near_equal3(qvs_get_translation(transform_lerp_s), qvs_get_translation(transform_ref_lerp_s), threshold));
CHECK(scalar_near_equal(qvs_get_scale(transform_lerp), qvs_get_scale(transform_ref_lerp), threshold));
CHECK(scalar_near_equal(qvs_get_scale(transform_lerp_s), qvs_get_scale(transform_ref_lerp_s), threshold));

CHECK(quat_near_equal(transform_slerp.rotation, transform_ref_slerp.rotation, threshold));
CHECK(quat_near_equal(transform_slerp_s.rotation, transform_ref_slerp_s.rotation, threshold));
CHECK(vector_all_near_equal3(qvs_get_translation(transform_slerp), qvs_get_translation(transform_ref_slerp), threshold));
CHECK(vector_all_near_equal3(qvs_get_translation(transform_slerp_s), qvs_get_translation(transform_ref_slerp_s), threshold));
CHECK(scalar_near_equal(qvs_get_scale(transform_slerp), qvs_get_scale(transform_ref_slerp), threshold));
CHECK(scalar_near_equal(qvs_get_scale(transform_slerp_s), qvs_get_scale(transform_ref_slerp_s), threshold));

CHECK(quat_near_equal(transform_lerp_no_scale.rotation, transform_ref_lerp.rotation, threshold));
CHECK(quat_near_equal(transform_lerp_no_scale_s.rotation, transform_ref_lerp_s.rotation, threshold));
CHECK(vector_all_near_equal3(qvs_get_translation(transform_lerp_no_scale), qvs_get_translation(transform_ref_lerp), threshold));
CHECK(vector_all_near_equal3(qvs_get_translation(transform_lerp_no_scale_s), qvs_get_translation(transform_ref_lerp_s), threshold));
CHECK(scalar_near_equal(qvs_get_scale(transform_lerp_no_scale), qvs_get_scale(transform0), threshold));
CHECK(scalar_near_equal(qvs_get_scale(transform_lerp_no_scale_s), qvs_get_scale(transform0), threshold));

CHECK(quat_near_equal(transform_slerp_no_scale.rotation, transform_ref_slerp.rotation, threshold));
CHECK(quat_near_equal(transform_slerp_no_scale_s.rotation, transform_ref_slerp_s.rotation, threshold));
CHECK(vector_all_near_equal3(qvs_get_translation(transform_slerp_no_scale), qvs_get_translation(transform_ref_slerp), threshold));
CHECK(vector_all_near_equal3(qvs_get_translation(transform_slerp_no_scale_s), qvs_get_translation(transform_ref_slerp_s), threshold));
CHECK(scalar_near_equal(qvs_get_scale(transform_slerp_no_scale), qvs_get_scale(transform0), threshold));
CHECK(scalar_near_equal(qvs_get_scale(transform_slerp_no_scale_s), qvs_get_scale(transform0), threshold));
}

template<typename TransformType, typename FloatType>
static void test_qvs_impl(const TransformType& identity, const FloatType threshold)
{
using QuatType = typename related_types<FloatType>::quat;
using Vector4Type = typename related_types<FloatType>::vector4;
using Matrix3x3Type = typename related_types<FloatType>::matrix3x3;
using Matrix3x4Type = typename related_types<FloatType>::matrix3x4;

Expand Down Expand Up @@ -233,78 +311,7 @@ static void test_qvs_impl(const TransformType& identity, const FloatType thresho
CHECK(quat_is_normalized(transform_b.rotation, threshold));
}

{
FloatType alpha = FloatType(0.33);
ScalarType alpha_s = scalar_set(alpha);
QuatType quat0 = quat_normalize(quat_from_euler(scalar_deg_to_rad(FloatType(30.0)), scalar_deg_to_rad(FloatType(-45.0)), scalar_deg_to_rad(FloatType(90.0))));
QuatType quat1 = quat_normalize(quat_from_euler(scalar_deg_to_rad(FloatType(45.0)), scalar_deg_to_rad(FloatType(60.0)), scalar_deg_to_rad(FloatType(120.0))));

QuatType quat_ref_lerp = quat_lerp(quat0, quat1, alpha);
QuatType quat_ref_lerp_s = quat_lerp(quat0, quat1, alpha_s);
QuatType quat_ref_slerp = quat_slerp(quat0, quat1, alpha);
QuatType quat_ref_slerp_s = quat_slerp(quat0, quat1, alpha_s);

Vector4Type trans0 = vector_set(FloatType(-0.001138), FloatType(0.91623), FloatType(-1.624598));
Vector4Type trans1 = vector_set(FloatType(-0.001138), FloatType(0.91623), FloatType(-1.624598));

Vector4Type trans_ref = vector_lerp(trans0, trans1, alpha);
Vector4Type trans_ref_s = vector_lerp(trans0, trans1, alpha_s);

FloatType scale0 = FloatType(-1.915);
FloatType scale1 = FloatType(-0.2113);

FloatType scale_ref = scalar_lerp(scale0, scale1, alpha);
ScalarType scale_ref_s = scalar_lerp(scalar_set(scale0), scalar_set(scale1), alpha_s);

TransformType transform0 = qvs_set(quat0, trans0, scale0);
TransformType transform1 = qvs_set(quat1, trans1, scale1);

TransformType transform_ref_lerp = qvs_set(quat_ref_lerp, trans_ref, scale_ref);
TransformType transform_ref_slerp = qvs_set(quat_ref_slerp, trans_ref, scale_ref);

TransformType transform_ref_lerp_s = qvs_set(quat_ref_lerp_s, trans_ref_s, scale_ref_s);
TransformType transform_ref_slerp_s = qvs_set(quat_ref_slerp_s, trans_ref_s, scale_ref_s);

TransformType transform_lerp = qvs_lerp(transform0, transform1, alpha);
TransformType transform_lerp_s = qvs_lerp(transform0, transform1, alpha_s);

TransformType transform_slerp = qvs_slerp(transform0, transform1, alpha);
TransformType transform_slerp_s = qvs_slerp(transform0, transform1, alpha_s);

TransformType transform_lerp_no_scale = qvs_lerp_no_scale(transform0, transform1, alpha);
TransformType transform_lerp_no_scale_s = qvs_lerp_no_scale(transform0, transform1, alpha_s);

TransformType transform_slerp_no_scale = qvs_slerp_no_scale(transform0, transform1, alpha);
TransformType transform_slerp_no_scale_s = qvs_slerp_no_scale(transform0, transform1, alpha_s);

CHECK(quat_near_equal(transform_lerp.rotation, transform_ref_lerp.rotation, threshold));
CHECK(quat_near_equal(transform_lerp_s.rotation, transform_ref_lerp_s.rotation, threshold));
CHECK(vector_all_near_equal3(qvs_get_translation(transform_lerp), qvs_get_translation(transform_ref_lerp), threshold));
CHECK(vector_all_near_equal3(qvs_get_translation(transform_lerp_s), qvs_get_translation(transform_ref_lerp_s), threshold));
CHECK(scalar_near_equal(qvs_get_scale(transform_lerp), qvs_get_scale(transform_ref_lerp), threshold));
CHECK(scalar_near_equal(qvs_get_scale(transform_lerp_s), qvs_get_scale(transform_ref_lerp_s), threshold));

CHECK(quat_near_equal(transform_slerp.rotation, transform_ref_slerp.rotation, threshold));
CHECK(quat_near_equal(transform_slerp_s.rotation, transform_ref_slerp_s.rotation, threshold));
CHECK(vector_all_near_equal3(qvs_get_translation(transform_slerp), qvs_get_translation(transform_ref_slerp), threshold));
CHECK(vector_all_near_equal3(qvs_get_translation(transform_slerp_s), qvs_get_translation(transform_ref_slerp_s), threshold));
CHECK(scalar_near_equal(qvs_get_scale(transform_slerp), qvs_get_scale(transform_ref_slerp), threshold));
CHECK(scalar_near_equal(qvs_get_scale(transform_slerp_s), qvs_get_scale(transform_ref_slerp_s), threshold));

CHECK(quat_near_equal(transform_lerp_no_scale.rotation, transform_ref_lerp.rotation, threshold));
CHECK(quat_near_equal(transform_lerp_no_scale_s.rotation, transform_ref_lerp_s.rotation, threshold));
CHECK(vector_all_near_equal3(qvs_get_translation(transform_lerp_no_scale), qvs_get_translation(transform_ref_lerp), threshold));
CHECK(vector_all_near_equal3(qvs_get_translation(transform_lerp_no_scale_s), qvs_get_translation(transform_ref_lerp_s), threshold));
CHECK(scalar_near_equal(qvs_get_scale(transform_lerp_no_scale), qvs_get_scale(transform0), threshold));
CHECK(scalar_near_equal(qvs_get_scale(transform_lerp_no_scale_s), qvs_get_scale(transform0), threshold));

CHECK(quat_near_equal(transform_slerp_no_scale.rotation, transform_ref_slerp.rotation, threshold));
CHECK(quat_near_equal(transform_slerp_no_scale_s.rotation, transform_ref_slerp_s.rotation, threshold));
CHECK(vector_all_near_equal3(qvs_get_translation(transform_slerp_no_scale), qvs_get_translation(transform_ref_slerp), threshold));
CHECK(vector_all_near_equal3(qvs_get_translation(transform_slerp_no_scale_s), qvs_get_translation(transform_ref_slerp_s), threshold));
CHECK(scalar_near_equal(qvs_get_scale(transform_slerp_no_scale), qvs_get_scale(transform0), threshold));
CHECK(scalar_near_equal(qvs_get_scale(transform_slerp_no_scale_s), qvs_get_scale(transform0), threshold));
}
test_qvs_interpolation<TransformType>(threshold);

{
const FloatType inf = std::numeric_limits<FloatType>::infinity();
Expand Down
Loading
Loading