From 87c83dd7307a261b8ef5f3182ce56323bbb69111 Mon Sep 17 00:00:00 2001 From: Nicholas Frechette Date: Wed, 26 Mar 2025 21:56:25 -0400 Subject: [PATCH 1/6] refactor(tests): rename test cpp --- tests/sources/{test_macros.cpp => test_macros_matrix.cpp} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename tests/sources/{test_macros.cpp => test_macros_matrix.cpp} (99%) diff --git a/tests/sources/test_macros.cpp b/tests/sources/test_macros_matrix.cpp similarity index 99% rename from tests/sources/test_macros.cpp rename to tests/sources/test_macros_matrix.cpp index c74ec011..59794cfb 100644 --- a/tests/sources/test_macros.cpp +++ b/tests/sources/test_macros_matrix.cpp @@ -27,7 +27,7 @@ #include #include -TEST_CASE("macros", "[math][macros]") +TEST_CASE("macros matrix", "[math][macros][matrix]") { const float threshold = 0.0F; // Result must be binary exact! From 14685a8b2943850ab08736d65e7903a379469365 Mon Sep 17 00:00:00 2001 From: Nicholas Frechette Date: Wed, 26 Mar 2025 22:08:45 -0400 Subject: [PATCH 2/6] feat(tests): add matrixd macro tests --- tests/sources/test_macros_matrix.cpp | 110 ++++++++++++++++++++++++++- 1 file changed, 109 insertions(+), 1 deletion(-) diff --git a/tests/sources/test_macros_matrix.cpp b/tests/sources/test_macros_matrix.cpp index 59794cfb..58f9f44c 100644 --- a/tests/sources/test_macros_matrix.cpp +++ b/tests/sources/test_macros_matrix.cpp @@ -25,9 +25,10 @@ #include "catch2.impl.h" #include +#include #include -TEST_CASE("macros matrix", "[math][macros][matrix]") +TEST_CASE("macros matrixf", "[math][macros][matrix]") { const float threshold = 0.0F; // Result must be binary exact! @@ -127,3 +128,110 @@ TEST_CASE("macros matrix", "[math][macros][matrix]") CHECK(rtm::vector_all_near_equal3(rtm::vector_set(20.0F, 21.0F, 22.0F), xxx, threshold)); } } + +TEST_CASE("macros matrixd", "[math][macros][matrix]") +{ + const double threshold = 0.0; // Result must be binary exact! + + { + rtm::vector4d xyz0 = rtm::vector_set(1.0, 2.0, 3.0); + rtm::vector4d xyz1 = rtm::vector_set(4.0, 5.0, 6.0); + rtm::vector4d xyz2 = rtm::vector_set(7.0, 8.0, 9.0); + + rtm::vector4d xxx; + rtm::vector4d yyy; + rtm::vector4d zzz; + RTM_MATRIXD_TRANSPOSE_3X3(xyz0, xyz1, xyz2, xxx, yyy, zzz); + + CHECK(rtm::vector_all_near_equal3(rtm::vector_set(1.0, 4.0, 7.0), xxx, threshold)); + CHECK(rtm::vector_all_near_equal3(rtm::vector_set(2.0, 5.0, 8.0), yyy, threshold)); + CHECK(rtm::vector_all_near_equal3(rtm::vector_set(3.0, 6.0, 9.0), zzz, threshold)); + + // Test when input == output + RTM_MATRIXD_TRANSPOSE_3X3(xyz0, xyz1, xyz2, xyz0, xyz1, xyz2); + + CHECK(rtm::vector_all_near_equal3(rtm::vector_set(1.0, 4.0, 7.0), xyz0, threshold)); + CHECK(rtm::vector_all_near_equal3(rtm::vector_set(2.0, 5.0, 8.0), xyz1, threshold)); + CHECK(rtm::vector_all_near_equal3(rtm::vector_set(3.0, 6.0, 9.0), xyz2, threshold)); + } + +#if 0 // not implemented yet + { + rtm::vector4d xyzw0 = rtm::vector_set(1.0, 2.0, 3.0, 20.0); + rtm::vector4d xyzw1 = rtm::vector_set(4.0, 5.0, 6.0, 21.0); + rtm::vector4d xyzw2 = rtm::vector_set(7.0, 8.0, 9.0, 22.0); + rtm::vector4d xyzw3 = rtm::vector_set(10.0, 11.0, 12.0, 23.0); + + rtm::vector4d xxxx; + rtm::vector4d yyyy; + rtm::vector4d zzzz; + rtm::vector4d wwww; + RTM_MATRIXD_TRANSPOSE_4X4(xyzw0, xyzw1, xyzw2, xyzw3, xxxx, yyyy, zzzz, wwww); + + CHECK(rtm::vector_all_near_equal(rtm::vector_set(1.0, 4.0, 7.0, 10.0), xxxx, threshold)); + CHECK(rtm::vector_all_near_equal(rtm::vector_set(2.0, 5.0, 8.0, 11.0), yyyy, threshold)); + CHECK(rtm::vector_all_near_equal(rtm::vector_set(3.0, 6.0, 9.0, 12.0), zzzz, threshold)); + CHECK(rtm::vector_all_near_equal(rtm::vector_set(20.0, 21.0, 22.0, 23.0), wwww, threshold)); + + // Test when input == output + RTM_MATRIXD_TRANSPOSE_4X4(xyzw0, xyzw1, xyzw2, xyzw3, xyzw0, xyzw1, xyzw2, xyzw3); + + CHECK(rtm::vector_all_near_equal(rtm::vector_set(1.0, 4.0, 7.0, 10.0), xyzw0, threshold)); + CHECK(rtm::vector_all_near_equal(rtm::vector_set(2.0, 5.0, 8.0, 11.0), xyzw1, threshold)); + CHECK(rtm::vector_all_near_equal(rtm::vector_set(3.0, 6.0, 9.0, 12.0), xyzw2, threshold)); + CHECK(rtm::vector_all_near_equal(rtm::vector_set(20.0, 21.0, 22.0, 23.0), xyzw3, threshold)); + } +#endif + +#if 0 // not implemented yet + { + rtm::vector4d xyz0 = rtm::vector_set(1.0, 2.0, 3.0, 20.0); + rtm::vector4d xyz1 = rtm::vector_set(4.0, 5.0, 6.0, 21.0); + rtm::vector4d xyz2 = rtm::vector_set(7.0, 8.0, 9.0, 22.0); + rtm::vector4d xyz3 = rtm::vector_set(10.0, 11.0, 12.0, 23.0); + + rtm::vector4d xxxx; + rtm::vector4d yyyy; + rtm::vector4d zzzz; + RTM_MATRIXD_TRANSPOSE_4X3(xyz0, xyz1, xyz2, xyz3, xxxx, yyyy, zzzz); + + CHECK(rtm::vector_all_near_equal(rtm::vector_set(1.0, 4.0, 7.0, 10.0), xxxx, threshold)); + CHECK(rtm::vector_all_near_equal(rtm::vector_set(2.0, 5.0, 8.0, 11.0), yyyy, threshold)); + CHECK(rtm::vector_all_near_equal(rtm::vector_set(3.0, 6.0, 9.0, 12.0), zzzz, threshold)); + + // Test when input == output + RTM_MATRIXD_TRANSPOSE_4X3(xyz0, xyz1, xyz2, xyz3, xyz0, xyz1, xyz2); + + CHECK(rtm::vector_all_near_equal(rtm::vector_set(1.0, 4.0, 7.0, 10.0), xyz0, threshold)); + CHECK(rtm::vector_all_near_equal(rtm::vector_set(2.0, 5.0, 8.0, 11.0), xyz1, threshold)); + CHECK(rtm::vector_all_near_equal(rtm::vector_set(3.0, 6.0, 9.0, 12.0), xyz2, threshold)); + } +#endif + +#if 0 // not implemented yet + { + rtm::vector4d xyzw0 = rtm::vector_set(1.0, 2.0, 3.0, 20.0); + rtm::vector4d xyzw1 = rtm::vector_set(4.0, 5.0, 6.0, 21.0); + rtm::vector4d xyzw2 = rtm::vector_set(7.0, 8.0, 9.0, 22.0); + + rtm::vector4d xxx; + rtm::vector4d yyy; + rtm::vector4d zzz; + rtm::vector4d www; + RTM_MATRIXD_TRANSPOSE_3X4(xyzw0, xyzw1, xyzw2, xxx, yyy, zzz, www); + + CHECK(rtm::vector_all_near_equal3(rtm::vector_set(1.0, 4.0, 7.0), xxx, threshold)); + CHECK(rtm::vector_all_near_equal3(rtm::vector_set(2.0, 5.0, 8.0), yyy, threshold)); + CHECK(rtm::vector_all_near_equal3(rtm::vector_set(3.0, 6.0, 9.0), zzz, threshold)); + CHECK(rtm::vector_all_near_equal3(rtm::vector_set(20.0, 21.0, 22.0), www, threshold)); + + // Test when input == output + RTM_MATRIXD_TRANSPOSE_3X4(xyzw0, xyzw1, xyzw2, xyzw0, xyzw1, xyzw2, xxx); + + CHECK(rtm::vector_all_near_equal3(rtm::vector_set(1.0, 4.0, 7.0), xyzw0, threshold)); + CHECK(rtm::vector_all_near_equal3(rtm::vector_set(2.0, 5.0, 8.0), xyzw1, threshold)); + CHECK(rtm::vector_all_near_equal3(rtm::vector_set(3.0, 6.0, 9.0), xyzw2, threshold)); + CHECK(rtm::vector_all_near_equal3(rtm::vector_set(20.0, 21.0, 22.0), xxx, threshold)); + } +#endif +} From c4d41365aa62839d8fa720a0664ff7ab97bd4ce0 Mon Sep 17 00:00:00 2001 From: Nicholas Frechette Date: Wed, 26 Mar 2025 22:13:18 -0400 Subject: [PATCH 3/6] feat(macros): add matrix transpose 2x2 macro --- includes/rtm/impl/macros.matrix.impl.h | 66 ++++++++++++++++++++++++++ tests/sources/test_macros_matrix.cpp | 36 ++++++++++++++ 2 files changed, 102 insertions(+) diff --git a/includes/rtm/impl/macros.matrix.impl.h b/includes/rtm/impl/macros.matrix.impl.h index 631e3612..9da1ac72 100644 --- a/includes/rtm/impl/macros.matrix.impl.h +++ b/includes/rtm/impl/macros.matrix.impl.h @@ -185,6 +185,72 @@ RTM_IMPL_FILE_PRAGMA_PUSH } while(0) #endif +#if defined(RTM_NEON_INTRINSICS) + ////////////////////////////////////////////////////////////////////////// + // Transposes a 2x2 matrix. + // All inputs and outputs must be rtm::vector4f. + ////////////////////////////////////////////////////////////////////////// + #define RTM_MATRIXF_TRANSPOSE_2X2(input_xy0, input_xy1, output_xx, output_yy) \ + do { \ + const float32x4_t x0x1y0y1 = vzip1q_f32((input_xy0), (input_xy1)); \ + (output_xx) = x0x1y0y1; \ + (output_yy) = vextq_f32(x0x1y0y1, x0x1y0y1, 2); \ + } while(0) +#elif defined(RTM_SSE2_INTRINSICS) + ////////////////////////////////////////////////////////////////////////// + // Transposes a 2x2 matrix. + // All inputs and outputs must be rtm::vector4f. + ////////////////////////////////////////////////////////////////////////// + #define RTM_MATRIXF_TRANSPOSE_2X2(input_xy0, input_xy1, output_xx, output_yy) \ + do { \ + const __m128 x0x1y0y1 = _mm_unpacklo_ps((input_xy0), (input_xy1)); \ + (output_xx) = x0x1y0y1; \ + (output_yy) = _mm_movehl_ps(x0x1y0y1, x0x1y0y1); \ + } while(0) +#else + ////////////////////////////////////////////////////////////////////////// + // Transposes a 2x2 matrix. + // All inputs and outputs must be rtm::vector4f. + ////////////////////////////////////////////////////////////////////////// + #define RTM_MATRIXF_TRANSPOSE_2X2(input_xy0, input_xy1, output_xx, output_yy) \ + do { \ + const float input_x0 = (input_xy0).x; \ + const float input_y0 = (input_xy0).y; \ + const float input_x1 = (input_xy1).x; \ + const float input_y1 = (input_xy1).y; \ + (output_xx) = RTM_IMPL_NAMESPACE::vector4f { input_x0, input_x1, input_x0, input_x1 }; \ + (output_yy) = RTM_IMPL_NAMESPACE::vector4f { input_y0, input_y1, input_y0, input_y1 }; \ + } while(0) +#endif + +#if defined(RTM_SSE2_INTRINSICS) + ////////////////////////////////////////////////////////////////////////// + // Transposes a 2x2 matrix. + // All inputs and outputs must be rtm::vector4d. + ////////////////////////////////////////////////////////////////////////// + #define RTM_MATRIXD_TRANSPOSE_2X2(input_xy0, input_xy1, output_xx, output_yy) \ + do { \ + const __m128d x0x1 = _mm_unpacklo_pd((input_xy0).xy, (input_xy1).xy); \ + const __m128d y0y1 = _mm_unpackhi_pd((input_xy0).xy, (input_xy1).xy); \ + (output_xx) = RTM_IMPL_NAMESPACE::vector4d { x0x1, x0x1 }; \ + (output_yy) = RTM_IMPL_NAMESPACE::vector4d { y0y1, y0y1 }; \ + } while(0) +#else + ////////////////////////////////////////////////////////////////////////// + // Transposes a 2x2 matrix. + // All inputs and outputs must be rtm::vector4d. + ////////////////////////////////////////////////////////////////////////// + #define RTM_MATRIXD_TRANSPOSE_2X2(input_xy0, input_xy1, output_xx, output_yy) \ + do { \ + const double input_x0 = (input_xy0).x; \ + const double input_y0 = (input_xy0).y; \ + const double input_x1 = (input_xy1).x; \ + const double input_y1 = (input_xy1).y; \ + (output_xx) = RTM_IMPL_NAMESPACE::vector4d { input_x0, input_x1, input_x0, input_x1 }; \ + (output_yy) = RTM_IMPL_NAMESPACE::vector4d { input_y0, input_y1, input_y0, input_y1 }; \ + } while(0) +#endif + #if defined(RTM_NEON_INTRINSICS) ////////////////////////////////////////////////////////////////////////// // Transposes a 4x3 matrix. diff --git a/tests/sources/test_macros_matrix.cpp b/tests/sources/test_macros_matrix.cpp index 58f9f44c..76272819 100644 --- a/tests/sources/test_macros_matrix.cpp +++ b/tests/sources/test_macros_matrix.cpp @@ -32,6 +32,24 @@ TEST_CASE("macros matrixf", "[math][macros][matrix]") { const float threshold = 0.0F; // Result must be binary exact! + { + rtm::vector4f xy0 = rtm::vector_set(1.0F, 2.0F, 3.0F); + rtm::vector4f xy1 = rtm::vector_set(4.0F, 5.0F, 6.0F); + + rtm::vector4f xx; + rtm::vector4f yy; + RTM_MATRIXF_TRANSPOSE_2X2(xy0, xy1, xx, yy); + + CHECK(rtm::vector_all_near_equal2(rtm::vector_set(1.0F, 4.0F, 7.0F), xx, threshold)); + CHECK(rtm::vector_all_near_equal2(rtm::vector_set(2.0F, 5.0F, 8.0F), yy, threshold)); + + // Test when input == output + RTM_MATRIXF_TRANSPOSE_2X2(xy0, xy1, xy0, xy1); + + CHECK(rtm::vector_all_near_equal2(rtm::vector_set(1.0F, 4.0F, 7.0F), xy0, threshold)); + CHECK(rtm::vector_all_near_equal2(rtm::vector_set(2.0F, 5.0F, 8.0F), xy1, threshold)); + } + { rtm::vector4f xyz0 = rtm::vector_set(1.0F, 2.0F, 3.0F); rtm::vector4f xyz1 = rtm::vector_set(4.0F, 5.0F, 6.0F); @@ -133,6 +151,24 @@ TEST_CASE("macros matrixd", "[math][macros][matrix]") { const double threshold = 0.0; // Result must be binary exact! + { + rtm::vector4d xy0 = rtm::vector_set(1.0, 2.0, 3.0); + rtm::vector4d xy1 = rtm::vector_set(4.0, 5.0, 6.0); + + rtm::vector4d xx; + rtm::vector4d yy; + RTM_MATRIXD_TRANSPOSE_2X2(xy0, xy1, xx, yy); + + CHECK(rtm::vector_all_near_equal2(rtm::vector_set(1.0, 4.0, 7.0), xx, threshold)); + CHECK(rtm::vector_all_near_equal2(rtm::vector_set(2.0, 5.0, 8.0), yy, threshold)); + + // Test when input == output + RTM_MATRIXD_TRANSPOSE_2X2(xy0, xy1, xy0, xy1); + + CHECK(rtm::vector_all_near_equal2(rtm::vector_set(1.0, 4.0, 7.0), xy0, threshold)); + CHECK(rtm::vector_all_near_equal2(rtm::vector_set(2.0, 5.0, 8.0), xy1, threshold)); + } + { rtm::vector4d xyz0 = rtm::vector_set(1.0, 2.0, 3.0); rtm::vector4d xyz1 = rtm::vector_set(4.0, 5.0, 6.0); From da15bd85877782bbec4b88e956101e6f7f265a20 Mon Sep 17 00:00:00 2001 From: Nicholas Frechette Date: Wed, 26 Mar 2025 22:22:13 -0400 Subject: [PATCH 4/6] feat(macros): add matrixd 4x4 transpose --- includes/rtm/impl/macros.matrix.impl.h | 50 ++++++++++++++++++++++++++ tests/sources/test_macros_matrix.cpp | 2 -- 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/includes/rtm/impl/macros.matrix.impl.h b/includes/rtm/impl/macros.matrix.impl.h index 9da1ac72..853289ed 100644 --- a/includes/rtm/impl/macros.matrix.impl.h +++ b/includes/rtm/impl/macros.matrix.impl.h @@ -97,6 +97,56 @@ RTM_IMPL_FILE_PRAGMA_PUSH } while(0) #endif +#if defined(RTM_SSE2_INTRINSICS) + ////////////////////////////////////////////////////////////////////////// + // Transposes a 4x4 matrix. + // All inputs and outputs must be rtm::vector4d. + ////////////////////////////////////////////////////////////////////////// + #define RTM_MATRIXD_TRANSPOSE_4X4(input_xyzw0, input_xyzw1, input_xyzw2, input_xyzw3, output_xxxx, output_yyyy, output_zzzz, output_wwww) \ + do { \ + const __m128d x0x1 = _mm_unpacklo_pd(input_xyzw0.xy, input_xyzw1.xy); \ + const __m128d y0y1 = _mm_unpackhi_pd(input_xyzw0.xy, input_xyzw1.xy); \ + const __m128d x2x3 = _mm_unpacklo_pd(input_xyzw2.xy, input_xyzw3.xy); \ + const __m128d y2y3 = _mm_unpackhi_pd(input_xyzw2.xy, input_xyzw3.xy); \ + const __m128d z0z1 = _mm_unpacklo_pd(input_xyzw0.zw, input_xyzw1.zw); \ + const __m128d w0w1 = _mm_unpackhi_pd(input_xyzw0.zw, input_xyzw1.zw); \ + const __m128d z2z3 = _mm_unpacklo_pd(input_xyzw2.zw, input_xyzw3.zw); \ + const __m128d w2w3 = _mm_unpackhi_pd(input_xyzw2.zw, input_xyzw3.zw); \ + (output_xxxx) = RTM_IMPL_NAMESPACE::vector4d { x0x1, x2x3 }; \ + (output_yyyy) = RTM_IMPL_NAMESPACE::vector4d { y0y1, y2y3 }; \ + (output_zzzz) = RTM_IMPL_NAMESPACE::vector4d { z0z1, z2z3 }; \ + (output_wwww) = RTM_IMPL_NAMESPACE::vector4d { w0w1, w2w3 }; \ + } while(0) +#else + ////////////////////////////////////////////////////////////////////////// + // Transposes a 4x4 matrix. + // All inputs and outputs must be rtm::vector4d. + ////////////////////////////////////////////////////////////////////////// + #define RTM_MATRIXD_TRANSPOSE_4X4(input_xyzw0, input_xyzw1, input_xyzw2, input_xyzw3, output_xxxx, output_yyyy, output_zzzz, output_wwww) \ + do { \ + const double input_x0 = (input_xyzw0).x; \ + const double input_y0 = (input_xyzw0).y; \ + const double input_z0 = (input_xyzw0).z; \ + const double input_w0 = (input_xyzw0).w; \ + const double input_x1 = (input_xyzw1).x; \ + const double input_y1 = (input_xyzw1).y; \ + const double input_z1 = (input_xyzw1).z; \ + const double input_w1 = (input_xyzw1).w; \ + const double input_x2 = (input_xyzw2).x; \ + const double input_y2 = (input_xyzw2).y; \ + const double input_z2 = (input_xyzw2).z; \ + const double input_w2 = (input_xyzw2).w; \ + const double input_x3 = (input_xyzw3).x; \ + const double input_y3 = (input_xyzw3).y; \ + const double input_z3 = (input_xyzw3).z; \ + const double input_w3 = (input_xyzw3).w; \ + (output_xxxx) = RTM_IMPL_NAMESPACE::vector4d { input_x0, input_x1, input_x2, input_x3 }; \ + (output_yyyy) = RTM_IMPL_NAMESPACE::vector4d { input_y0, input_y1, input_y2, input_y3 }; \ + (output_zzzz) = RTM_IMPL_NAMESPACE::vector4d { input_z0, input_z1, input_z2, input_z3 }; \ + (output_wwww) = RTM_IMPL_NAMESPACE::vector4d { input_w0, input_w1, input_w2, input_w3 }; \ + } while(0) +#endif + #if defined(RTM_NEON_INTRINSICS) ////////////////////////////////////////////////////////////////////////// // Transposes a 3x3 matrix. diff --git a/tests/sources/test_macros_matrix.cpp b/tests/sources/test_macros_matrix.cpp index 76272819..d47e932f 100644 --- a/tests/sources/test_macros_matrix.cpp +++ b/tests/sources/test_macros_matrix.cpp @@ -191,7 +191,6 @@ TEST_CASE("macros matrixd", "[math][macros][matrix]") CHECK(rtm::vector_all_near_equal3(rtm::vector_set(3.0, 6.0, 9.0), xyz2, threshold)); } -#if 0 // not implemented yet { rtm::vector4d xyzw0 = rtm::vector_set(1.0, 2.0, 3.0, 20.0); rtm::vector4d xyzw1 = rtm::vector_set(4.0, 5.0, 6.0, 21.0); @@ -217,7 +216,6 @@ TEST_CASE("macros matrixd", "[math][macros][matrix]") CHECK(rtm::vector_all_near_equal(rtm::vector_set(3.0, 6.0, 9.0, 12.0), xyzw2, threshold)); CHECK(rtm::vector_all_near_equal(rtm::vector_set(20.0, 21.0, 22.0, 23.0), xyzw3, threshold)); } -#endif #if 0 // not implemented yet { From 1766d960200c8bf17a5e18f6d4195fb284f7cabc Mon Sep 17 00:00:00 2001 From: Nicholas Frechette Date: Wed, 26 Mar 2025 22:26:34 -0400 Subject: [PATCH 5/6] feat(macros): add matrixd 4x3 transpose --- includes/rtm/impl/macros.matrix.impl.h | 42 ++++++++++++++++++++++++++ tests/sources/test_macros_matrix.cpp | 2 -- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/includes/rtm/impl/macros.matrix.impl.h b/includes/rtm/impl/macros.matrix.impl.h index 853289ed..86f8c242 100644 --- a/includes/rtm/impl/macros.matrix.impl.h +++ b/includes/rtm/impl/macros.matrix.impl.h @@ -356,6 +356,48 @@ RTM_IMPL_FILE_PRAGMA_PUSH } while(0) #endif +#if defined(RTM_SSE2_INTRINSICS) + ////////////////////////////////////////////////////////////////////////// + // Transposes a 4x3 matrix. + // All inputs and outputs must be rtm::vector4d. + ////////////////////////////////////////////////////////////////////////// + #define RTM_MATRIXD_TRANSPOSE_4X3(input_xyz0, input_xyz1, input_xyz2, input_xyz3, output_xxxx, output_yyyy, output_zzzz) \ + do { \ + const __m128d x0x1 = _mm_unpacklo_pd(input_xyz0.xy, input_xyz1.xy); \ + const __m128d y0y1 = _mm_unpackhi_pd(input_xyz0.xy, input_xyz1.xy); \ + const __m128d x2x3 = _mm_unpacklo_pd(input_xyz2.xy, input_xyz3.xy); \ + const __m128d y2y3 = _mm_unpackhi_pd(input_xyz2.xy, input_xyz3.xy); \ + const __m128d z0z1 = _mm_unpacklo_pd(input_xyz0.zw, input_xyz1.zw); \ + const __m128d z2z3 = _mm_unpacklo_pd(input_xyz2.zw, input_xyz3.zw); \ + (output_xxxx) = RTM_IMPL_NAMESPACE::vector4d { x0x1, x2x3 }; \ + (output_yyyy) = RTM_IMPL_NAMESPACE::vector4d { y0y1, y2y3 }; \ + (output_zzzz) = RTM_IMPL_NAMESPACE::vector4d { z0z1, z2z3 }; \ + } while(0) +#else + ////////////////////////////////////////////////////////////////////////// + // Transposes a 4x3 matrix. + // All inputs and outputs must be rtm::vector4d. + ////////////////////////////////////////////////////////////////////////// + #define RTM_MATRIXD_TRANSPOSE_4X3(input_xyz0, input_xyz1, input_xyz2, input_xyz3, output_xxxx, output_yyyy, output_zzzz) \ + do { \ + const double input_x0 = (input_xyz0).x; \ + const double input_y0 = (input_xyz0).y; \ + const double input_z0 = (input_xyz0).z; \ + const double input_x1 = (input_xyz1).x; \ + const double input_y1 = (input_xyz1).y; \ + const double input_z1 = (input_xyz1).z; \ + const double input_x2 = (input_xyz2).x; \ + const double input_y2 = (input_xyz2).y; \ + const double input_z2 = (input_xyz2).z; \ + const double input_x3 = (input_xyz3).x; \ + const double input_y3 = (input_xyz3).y; \ + const double input_z3 = (input_xyz3).z; \ + (output_xxxx) = RTM_IMPL_NAMESPACE::vector4d { input_x0, input_x1, input_x2, input_x3 }; \ + (output_yyyy) = RTM_IMPL_NAMESPACE::vector4d { input_y0, input_y1, input_y2, input_y3 }; \ + (output_zzzz) = RTM_IMPL_NAMESPACE::vector4d { input_z0, input_z1, input_z2, input_z3 }; \ + } while(0) +#endif + #if defined(RTM_NEON_INTRINSICS) ////////////////////////////////////////////////////////////////////////// // Transposes a 3x4 matrix. diff --git a/tests/sources/test_macros_matrix.cpp b/tests/sources/test_macros_matrix.cpp index d47e932f..412e2122 100644 --- a/tests/sources/test_macros_matrix.cpp +++ b/tests/sources/test_macros_matrix.cpp @@ -217,7 +217,6 @@ TEST_CASE("macros matrixd", "[math][macros][matrix]") CHECK(rtm::vector_all_near_equal(rtm::vector_set(20.0, 21.0, 22.0, 23.0), xyzw3, threshold)); } -#if 0 // not implemented yet { rtm::vector4d xyz0 = rtm::vector_set(1.0, 2.0, 3.0, 20.0); rtm::vector4d xyz1 = rtm::vector_set(4.0, 5.0, 6.0, 21.0); @@ -240,7 +239,6 @@ TEST_CASE("macros matrixd", "[math][macros][matrix]") CHECK(rtm::vector_all_near_equal(rtm::vector_set(2.0, 5.0, 8.0, 11.0), xyz1, threshold)); CHECK(rtm::vector_all_near_equal(rtm::vector_set(3.0, 6.0, 9.0, 12.0), xyz2, threshold)); } -#endif #if 0 // not implemented yet { From d9a2c295b9b90ec7653091285bd86a993286178e Mon Sep 17 00:00:00 2001 From: Nicholas Frechette Date: Wed, 26 Mar 2025 22:33:23 -0400 Subject: [PATCH 6/6] feat(macros): add matrixd 3x4 transpose --- includes/rtm/impl/macros.matrix.impl.h | 44 ++++++++++++++++++++++++++ tests/sources/test_macros_matrix.cpp | 2 -- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/includes/rtm/impl/macros.matrix.impl.h b/includes/rtm/impl/macros.matrix.impl.h index 86f8c242..d7f3bc84 100644 --- a/includes/rtm/impl/macros.matrix.impl.h +++ b/includes/rtm/impl/macros.matrix.impl.h @@ -455,4 +455,48 @@ RTM_IMPL_FILE_PRAGMA_PUSH } while(0) #endif +#if defined(RTM_SSE2_INTRINSICS) + ////////////////////////////////////////////////////////////////////////// + // Transposes a 3x4 matrix. + // All inputs and outputs must be rtm::vector4d. + ////////////////////////////////////////////////////////////////////////// + #define RTM_MATRIXD_TRANSPOSE_3X4(input_xyzw0, input_xyzw1, input_xyzw2, output_xxx, output_yyy, output_zzz, output_www) \ + do { \ + const __m128d x0x1 = _mm_unpacklo_pd(input_xyzw0.xy, input_xyzw1.xy); \ + const __m128d y0y1 = _mm_unpackhi_pd(input_xyzw0.xy, input_xyzw1.xy); \ + const __m128d y2y2 = _mm_unpackhi_pd(input_xyzw2.xy, input_xyzw2.xy); \ + const __m128d z0z1 = _mm_unpacklo_pd(input_xyzw0.zw, input_xyzw1.zw); \ + const __m128d w0w1 = _mm_unpackhi_pd(input_xyzw0.zw, input_xyzw1.zw); \ + const __m128d w2w2 = _mm_unpackhi_pd(input_xyzw2.zw, input_xyzw2.zw); \ + (output_xxx) = RTM_IMPL_NAMESPACE::vector4d { x0x1, input_xyzw2.xy }; \ + (output_yyy) = RTM_IMPL_NAMESPACE::vector4d { y0y1, y2y2 }; \ + (output_zzz) = RTM_IMPL_NAMESPACE::vector4d { z0z1, input_xyzw2.zw }; \ + (output_www) = RTM_IMPL_NAMESPACE::vector4d { w0w1, w2w2 }; \ + } while(0) +#else + ////////////////////////////////////////////////////////////////////////// + // Transposes a 3x4 matrix. + // All inputs and outputs must be rtm::vector4d. + ////////////////////////////////////////////////////////////////////////// + #define RTM_MATRIXD_TRANSPOSE_3X4(input_xyzw0, input_xyzw1, input_xyzw2, output_xxx, output_yyy, output_zzz, output_www) \ + do { \ + const double input_x0 = (input_xyzw0).x; \ + const double input_y0 = (input_xyzw0).y; \ + const double input_z0 = (input_xyzw0).z; \ + const double input_w0 = (input_xyzw0).w; \ + const double input_x1 = (input_xyzw1).x; \ + const double input_y1 = (input_xyzw1).y; \ + const double input_z1 = (input_xyzw1).z; \ + const double input_w1 = (input_xyzw1).w; \ + const double input_x2 = (input_xyzw2).x; \ + const double input_y2 = (input_xyzw2).y; \ + const double input_z2 = (input_xyzw2).z; \ + const double input_w2 = (input_xyzw2).w; \ + (output_xxx) = RTM_IMPL_NAMESPACE::vector4d { input_x0, input_x1, input_x2, input_x2 }; \ + (output_yyy) = RTM_IMPL_NAMESPACE::vector4d { input_y0, input_y1, input_y2, input_y2 }; \ + (output_zzz) = RTM_IMPL_NAMESPACE::vector4d { input_z0, input_z1, input_z2, input_z2 }; \ + (output_www) = RTM_IMPL_NAMESPACE::vector4d { input_w0, input_w1, input_w2, input_w2 }; \ + } while(0) +#endif + RTM_IMPL_FILE_PRAGMA_POP diff --git a/tests/sources/test_macros_matrix.cpp b/tests/sources/test_macros_matrix.cpp index 412e2122..0874ee26 100644 --- a/tests/sources/test_macros_matrix.cpp +++ b/tests/sources/test_macros_matrix.cpp @@ -240,7 +240,6 @@ TEST_CASE("macros matrixd", "[math][macros][matrix]") CHECK(rtm::vector_all_near_equal(rtm::vector_set(3.0, 6.0, 9.0, 12.0), xyz2, threshold)); } -#if 0 // not implemented yet { rtm::vector4d xyzw0 = rtm::vector_set(1.0, 2.0, 3.0, 20.0); rtm::vector4d xyzw1 = rtm::vector_set(4.0, 5.0, 6.0, 21.0); @@ -265,5 +264,4 @@ TEST_CASE("macros matrixd", "[math][macros][matrix]") CHECK(rtm::vector_all_near_equal3(rtm::vector_set(3.0, 6.0, 9.0), xyzw2, threshold)); CHECK(rtm::vector_all_near_equal3(rtm::vector_set(20.0, 21.0, 22.0), xxx, threshold)); } -#endif }