Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
202 changes: 202 additions & 0 deletions includes/rtm/impl/macros.matrix.impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,56 @@ RTM_IMPL_FILE_PRAGMA_PUSH
} while(0)
#endif

#if defined(RTM_SSE2_INTRINSICS)
//////////////////////////////////////////////////////////////////////////
// Transposes a 4x4 matrix.
// All inputs and outputs must be rtm::vector4d.
//////////////////////////////////////////////////////////////////////////
#define RTM_MATRIXD_TRANSPOSE_4X4(input_xyzw0, input_xyzw1, input_xyzw2, input_xyzw3, output_xxxx, output_yyyy, output_zzzz, output_wwww) \
do { \
const __m128d x0x1 = _mm_unpacklo_pd(input_xyzw0.xy, input_xyzw1.xy); \
const __m128d y0y1 = _mm_unpackhi_pd(input_xyzw0.xy, input_xyzw1.xy); \
const __m128d x2x3 = _mm_unpacklo_pd(input_xyzw2.xy, input_xyzw3.xy); \
const __m128d y2y3 = _mm_unpackhi_pd(input_xyzw2.xy, input_xyzw3.xy); \
const __m128d z0z1 = _mm_unpacklo_pd(input_xyzw0.zw, input_xyzw1.zw); \
const __m128d w0w1 = _mm_unpackhi_pd(input_xyzw0.zw, input_xyzw1.zw); \
const __m128d z2z3 = _mm_unpacklo_pd(input_xyzw2.zw, input_xyzw3.zw); \
const __m128d w2w3 = _mm_unpackhi_pd(input_xyzw2.zw, input_xyzw3.zw); \
(output_xxxx) = RTM_IMPL_NAMESPACE::vector4d { x0x1, x2x3 }; \
(output_yyyy) = RTM_IMPL_NAMESPACE::vector4d { y0y1, y2y3 }; \
(output_zzzz) = RTM_IMPL_NAMESPACE::vector4d { z0z1, z2z3 }; \
(output_wwww) = RTM_IMPL_NAMESPACE::vector4d { w0w1, w2w3 }; \
} while(0)
#else
//////////////////////////////////////////////////////////////////////////
// Transposes a 4x4 matrix.
// All inputs and outputs must be rtm::vector4d.
//////////////////////////////////////////////////////////////////////////
#define RTM_MATRIXD_TRANSPOSE_4X4(input_xyzw0, input_xyzw1, input_xyzw2, input_xyzw3, output_xxxx, output_yyyy, output_zzzz, output_wwww) \
do { \
const double input_x0 = (input_xyzw0).x; \
const double input_y0 = (input_xyzw0).y; \
const double input_z0 = (input_xyzw0).z; \
const double input_w0 = (input_xyzw0).w; \
const double input_x1 = (input_xyzw1).x; \
const double input_y1 = (input_xyzw1).y; \
const double input_z1 = (input_xyzw1).z; \
const double input_w1 = (input_xyzw1).w; \
const double input_x2 = (input_xyzw2).x; \
const double input_y2 = (input_xyzw2).y; \
const double input_z2 = (input_xyzw2).z; \
const double input_w2 = (input_xyzw2).w; \
const double input_x3 = (input_xyzw3).x; \
const double input_y3 = (input_xyzw3).y; \
const double input_z3 = (input_xyzw3).z; \
const double input_w3 = (input_xyzw3).w; \
(output_xxxx) = RTM_IMPL_NAMESPACE::vector4d { input_x0, input_x1, input_x2, input_x3 }; \
(output_yyyy) = RTM_IMPL_NAMESPACE::vector4d { input_y0, input_y1, input_y2, input_y3 }; \
(output_zzzz) = RTM_IMPL_NAMESPACE::vector4d { input_z0, input_z1, input_z2, input_z3 }; \
(output_wwww) = RTM_IMPL_NAMESPACE::vector4d { input_w0, input_w1, input_w2, input_w3 }; \
} while(0)
#endif

#if defined(RTM_NEON_INTRINSICS)
//////////////////////////////////////////////////////////////////////////
// Transposes a 3x3 matrix.
Expand Down Expand Up @@ -185,6 +235,72 @@ RTM_IMPL_FILE_PRAGMA_PUSH
} while(0)
#endif

#if defined(RTM_NEON_INTRINSICS)
//////////////////////////////////////////////////////////////////////////
// Transposes a 2x2 matrix.
// All inputs and outputs must be rtm::vector4f.
//////////////////////////////////////////////////////////////////////////
#define RTM_MATRIXF_TRANSPOSE_2X2(input_xy0, input_xy1, output_xx, output_yy) \
do { \
const float32x4_t x0x1y0y1 = vzip1q_f32((input_xy0), (input_xy1)); \
(output_xx) = x0x1y0y1; \
(output_yy) = vextq_f32(x0x1y0y1, x0x1y0y1, 2); \
} while(0)
#elif defined(RTM_SSE2_INTRINSICS)
//////////////////////////////////////////////////////////////////////////
// Transposes a 2x2 matrix.
// All inputs and outputs must be rtm::vector4f.
//////////////////////////////////////////////////////////////////////////
#define RTM_MATRIXF_TRANSPOSE_2X2(input_xy0, input_xy1, output_xx, output_yy) \
do { \
const __m128 x0x1y0y1 = _mm_unpacklo_ps((input_xy0), (input_xy1)); \
(output_xx) = x0x1y0y1; \
(output_yy) = _mm_movehl_ps(x0x1y0y1, x0x1y0y1); \
} while(0)
#else
//////////////////////////////////////////////////////////////////////////
// Transposes a 2x2 matrix.
// All inputs and outputs must be rtm::vector4f.
//////////////////////////////////////////////////////////////////////////
#define RTM_MATRIXF_TRANSPOSE_2X2(input_xy0, input_xy1, output_xx, output_yy) \
do { \
const float input_x0 = (input_xy0).x; \
const float input_y0 = (input_xy0).y; \
const float input_x1 = (input_xy1).x; \
const float input_y1 = (input_xy1).y; \
(output_xx) = RTM_IMPL_NAMESPACE::vector4f { input_x0, input_x1, input_x0, input_x1 }; \
(output_yy) = RTM_IMPL_NAMESPACE::vector4f { input_y0, input_y1, input_y0, input_y1 }; \
} while(0)
#endif

#if defined(RTM_SSE2_INTRINSICS)
//////////////////////////////////////////////////////////////////////////
// Transposes a 2x2 matrix.
// All inputs and outputs must be rtm::vector4d.
//////////////////////////////////////////////////////////////////////////
#define RTM_MATRIXD_TRANSPOSE_2X2(input_xy0, input_xy1, output_xx, output_yy) \
do { \
const __m128d x0x1 = _mm_unpacklo_pd((input_xy0).xy, (input_xy1).xy); \
const __m128d y0y1 = _mm_unpackhi_pd((input_xy0).xy, (input_xy1).xy); \
(output_xx) = RTM_IMPL_NAMESPACE::vector4d { x0x1, x0x1 }; \
(output_yy) = RTM_IMPL_NAMESPACE::vector4d { y0y1, y0y1 }; \
} while(0)
#else
//////////////////////////////////////////////////////////////////////////
// Transposes a 2x2 matrix.
// All inputs and outputs must be rtm::vector4d.
//////////////////////////////////////////////////////////////////////////
#define RTM_MATRIXD_TRANSPOSE_2X2(input_xy0, input_xy1, output_xx, output_yy) \
do { \
const double input_x0 = (input_xy0).x; \
const double input_y0 = (input_xy0).y; \
const double input_x1 = (input_xy1).x; \
const double input_y1 = (input_xy1).y; \
(output_xx) = RTM_IMPL_NAMESPACE::vector4d { input_x0, input_x1, input_x0, input_x1 }; \
(output_yy) = RTM_IMPL_NAMESPACE::vector4d { input_y0, input_y1, input_y0, input_y1 }; \
} while(0)
#endif

#if defined(RTM_NEON_INTRINSICS)
//////////////////////////////////////////////////////////////////////////
// Transposes a 4x3 matrix.
Expand Down Expand Up @@ -240,6 +356,48 @@ RTM_IMPL_FILE_PRAGMA_PUSH
} while(0)
#endif

#if defined(RTM_SSE2_INTRINSICS)
//////////////////////////////////////////////////////////////////////////
// Transposes a 4x3 matrix.
// All inputs and outputs must be rtm::vector4d.
//////////////////////////////////////////////////////////////////////////
#define RTM_MATRIXD_TRANSPOSE_4X3(input_xyz0, input_xyz1, input_xyz2, input_xyz3, output_xxxx, output_yyyy, output_zzzz) \
do { \
const __m128d x0x1 = _mm_unpacklo_pd(input_xyz0.xy, input_xyz1.xy); \
const __m128d y0y1 = _mm_unpackhi_pd(input_xyz0.xy, input_xyz1.xy); \
const __m128d x2x3 = _mm_unpacklo_pd(input_xyz2.xy, input_xyz3.xy); \
const __m128d y2y3 = _mm_unpackhi_pd(input_xyz2.xy, input_xyz3.xy); \
const __m128d z0z1 = _mm_unpacklo_pd(input_xyz0.zw, input_xyz1.zw); \
const __m128d z2z3 = _mm_unpacklo_pd(input_xyz2.zw, input_xyz3.zw); \
(output_xxxx) = RTM_IMPL_NAMESPACE::vector4d { x0x1, x2x3 }; \
(output_yyyy) = RTM_IMPL_NAMESPACE::vector4d { y0y1, y2y3 }; \
(output_zzzz) = RTM_IMPL_NAMESPACE::vector4d { z0z1, z2z3 }; \
} while(0)
#else
//////////////////////////////////////////////////////////////////////////
// Transposes a 4x3 matrix.
// All inputs and outputs must be rtm::vector4d.
//////////////////////////////////////////////////////////////////////////
#define RTM_MATRIXD_TRANSPOSE_4X3(input_xyz0, input_xyz1, input_xyz2, input_xyz3, output_xxxx, output_yyyy, output_zzzz) \
do { \
const double input_x0 = (input_xyz0).x; \
const double input_y0 = (input_xyz0).y; \
const double input_z0 = (input_xyz0).z; \
const double input_x1 = (input_xyz1).x; \
const double input_y1 = (input_xyz1).y; \
const double input_z1 = (input_xyz1).z; \
const double input_x2 = (input_xyz2).x; \
const double input_y2 = (input_xyz2).y; \
const double input_z2 = (input_xyz2).z; \
const double input_x3 = (input_xyz3).x; \
const double input_y3 = (input_xyz3).y; \
const double input_z3 = (input_xyz3).z; \
(output_xxxx) = RTM_IMPL_NAMESPACE::vector4d { input_x0, input_x1, input_x2, input_x3 }; \
(output_yyyy) = RTM_IMPL_NAMESPACE::vector4d { input_y0, input_y1, input_y2, input_y3 }; \
(output_zzzz) = RTM_IMPL_NAMESPACE::vector4d { input_z0, input_z1, input_z2, input_z3 }; \
} while(0)
#endif

#if defined(RTM_NEON_INTRINSICS)
//////////////////////////////////////////////////////////////////////////
// Transposes a 3x4 matrix.
Expand Down Expand Up @@ -297,4 +455,48 @@ RTM_IMPL_FILE_PRAGMA_PUSH
} while(0)
#endif

#if defined(RTM_SSE2_INTRINSICS)
//////////////////////////////////////////////////////////////////////////
// Transposes a 3x4 matrix.
// All inputs and outputs must be rtm::vector4d.
//////////////////////////////////////////////////////////////////////////
#define RTM_MATRIXD_TRANSPOSE_3X4(input_xyzw0, input_xyzw1, input_xyzw2, output_xxx, output_yyy, output_zzz, output_www) \
do { \
const __m128d x0x1 = _mm_unpacklo_pd(input_xyzw0.xy, input_xyzw1.xy); \
const __m128d y0y1 = _mm_unpackhi_pd(input_xyzw0.xy, input_xyzw1.xy); \
const __m128d y2y2 = _mm_unpackhi_pd(input_xyzw2.xy, input_xyzw2.xy); \
const __m128d z0z1 = _mm_unpacklo_pd(input_xyzw0.zw, input_xyzw1.zw); \
const __m128d w0w1 = _mm_unpackhi_pd(input_xyzw0.zw, input_xyzw1.zw); \
const __m128d w2w2 = _mm_unpackhi_pd(input_xyzw2.zw, input_xyzw2.zw); \
(output_xxx) = RTM_IMPL_NAMESPACE::vector4d { x0x1, input_xyzw2.xy }; \
(output_yyy) = RTM_IMPL_NAMESPACE::vector4d { y0y1, y2y2 }; \
(output_zzz) = RTM_IMPL_NAMESPACE::vector4d { z0z1, input_xyzw2.zw }; \
(output_www) = RTM_IMPL_NAMESPACE::vector4d { w0w1, w2w2 }; \
} while(0)
#else
//////////////////////////////////////////////////////////////////////////
// Transposes a 3x4 matrix.
// All inputs and outputs must be rtm::vector4d.
//////////////////////////////////////////////////////////////////////////
#define RTM_MATRIXD_TRANSPOSE_3X4(input_xyzw0, input_xyzw1, input_xyzw2, output_xxx, output_yyy, output_zzz, output_www) \
do { \
const double input_x0 = (input_xyzw0).x; \
const double input_y0 = (input_xyzw0).y; \
const double input_z0 = (input_xyzw0).z; \
const double input_w0 = (input_xyzw0).w; \
const double input_x1 = (input_xyzw1).x; \
const double input_y1 = (input_xyzw1).y; \
const double input_z1 = (input_xyzw1).z; \
const double input_w1 = (input_xyzw1).w; \
const double input_x2 = (input_xyzw2).x; \
const double input_y2 = (input_xyzw2).y; \
const double input_z2 = (input_xyzw2).z; \
const double input_w2 = (input_xyzw2).w; \
(output_xxx) = RTM_IMPL_NAMESPACE::vector4d { input_x0, input_x1, input_x2, input_x2 }; \
(output_yyy) = RTM_IMPL_NAMESPACE::vector4d { input_y0, input_y1, input_y2, input_y2 }; \
(output_zzz) = RTM_IMPL_NAMESPACE::vector4d { input_z0, input_z1, input_z2, input_z2 }; \
(output_www) = RTM_IMPL_NAMESPACE::vector4d { input_w0, input_w1, input_w2, input_w2 }; \
} while(0)
#endif

RTM_IMPL_FILE_PRAGMA_POP
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,31 @@
#include "catch2.impl.h"

#include <rtm/macros.h>
#include <rtm/vector4d.h>
#include <rtm/vector4f.h>

TEST_CASE("macros", "[math][macros]")
TEST_CASE("macros matrixf", "[math][macros][matrix]")
{
const float threshold = 0.0F; // Result must be binary exact!

{
rtm::vector4f xy0 = rtm::vector_set(1.0F, 2.0F, 3.0F);
rtm::vector4f xy1 = rtm::vector_set(4.0F, 5.0F, 6.0F);

rtm::vector4f xx;
rtm::vector4f yy;
RTM_MATRIXF_TRANSPOSE_2X2(xy0, xy1, xx, yy);

CHECK(rtm::vector_all_near_equal2(rtm::vector_set(1.0F, 4.0F, 7.0F), xx, threshold));
CHECK(rtm::vector_all_near_equal2(rtm::vector_set(2.0F, 5.0F, 8.0F), yy, threshold));

// Test when input == output
RTM_MATRIXF_TRANSPOSE_2X2(xy0, xy1, xy0, xy1);

CHECK(rtm::vector_all_near_equal2(rtm::vector_set(1.0F, 4.0F, 7.0F), xy0, threshold));
CHECK(rtm::vector_all_near_equal2(rtm::vector_set(2.0F, 5.0F, 8.0F), xy1, threshold));
}

{
rtm::vector4f xyz0 = rtm::vector_set(1.0F, 2.0F, 3.0F);
rtm::vector4f xyz1 = rtm::vector_set(4.0F, 5.0F, 6.0F);
Expand Down Expand Up @@ -127,3 +146,122 @@ TEST_CASE("macros", "[math][macros]")
CHECK(rtm::vector_all_near_equal3(rtm::vector_set(20.0F, 21.0F, 22.0F), xxx, threshold));
}
}

TEST_CASE("macros matrixd", "[math][macros][matrix]")
{
const double threshold = 0.0; // Result must be binary exact!

{
rtm::vector4d xy0 = rtm::vector_set(1.0, 2.0, 3.0);
rtm::vector4d xy1 = rtm::vector_set(4.0, 5.0, 6.0);

rtm::vector4d xx;
rtm::vector4d yy;
RTM_MATRIXD_TRANSPOSE_2X2(xy0, xy1, xx, yy);

CHECK(rtm::vector_all_near_equal2(rtm::vector_set(1.0, 4.0, 7.0), xx, threshold));
CHECK(rtm::vector_all_near_equal2(rtm::vector_set(2.0, 5.0, 8.0), yy, threshold));

// Test when input == output
RTM_MATRIXD_TRANSPOSE_2X2(xy0, xy1, xy0, xy1);

CHECK(rtm::vector_all_near_equal2(rtm::vector_set(1.0, 4.0, 7.0), xy0, threshold));
CHECK(rtm::vector_all_near_equal2(rtm::vector_set(2.0, 5.0, 8.0), xy1, threshold));
}

{
rtm::vector4d xyz0 = rtm::vector_set(1.0, 2.0, 3.0);
rtm::vector4d xyz1 = rtm::vector_set(4.0, 5.0, 6.0);
rtm::vector4d xyz2 = rtm::vector_set(7.0, 8.0, 9.0);

rtm::vector4d xxx;
rtm::vector4d yyy;
rtm::vector4d zzz;
RTM_MATRIXD_TRANSPOSE_3X3(xyz0, xyz1, xyz2, xxx, yyy, zzz);

CHECK(rtm::vector_all_near_equal3(rtm::vector_set(1.0, 4.0, 7.0), xxx, threshold));
CHECK(rtm::vector_all_near_equal3(rtm::vector_set(2.0, 5.0, 8.0), yyy, threshold));
CHECK(rtm::vector_all_near_equal3(rtm::vector_set(3.0, 6.0, 9.0), zzz, threshold));

// Test when input == output
RTM_MATRIXD_TRANSPOSE_3X3(xyz0, xyz1, xyz2, xyz0, xyz1, xyz2);

CHECK(rtm::vector_all_near_equal3(rtm::vector_set(1.0, 4.0, 7.0), xyz0, threshold));
CHECK(rtm::vector_all_near_equal3(rtm::vector_set(2.0, 5.0, 8.0), xyz1, threshold));
CHECK(rtm::vector_all_near_equal3(rtm::vector_set(3.0, 6.0, 9.0), xyz2, threshold));
}

{
rtm::vector4d xyzw0 = rtm::vector_set(1.0, 2.0, 3.0, 20.0);
rtm::vector4d xyzw1 = rtm::vector_set(4.0, 5.0, 6.0, 21.0);
rtm::vector4d xyzw2 = rtm::vector_set(7.0, 8.0, 9.0, 22.0);
rtm::vector4d xyzw3 = rtm::vector_set(10.0, 11.0, 12.0, 23.0);

rtm::vector4d xxxx;
rtm::vector4d yyyy;
rtm::vector4d zzzz;
rtm::vector4d wwww;
RTM_MATRIXD_TRANSPOSE_4X4(xyzw0, xyzw1, xyzw2, xyzw3, xxxx, yyyy, zzzz, wwww);

CHECK(rtm::vector_all_near_equal(rtm::vector_set(1.0, 4.0, 7.0, 10.0), xxxx, threshold));
CHECK(rtm::vector_all_near_equal(rtm::vector_set(2.0, 5.0, 8.0, 11.0), yyyy, threshold));
CHECK(rtm::vector_all_near_equal(rtm::vector_set(3.0, 6.0, 9.0, 12.0), zzzz, threshold));
CHECK(rtm::vector_all_near_equal(rtm::vector_set(20.0, 21.0, 22.0, 23.0), wwww, threshold));

// Test when input == output
RTM_MATRIXD_TRANSPOSE_4X4(xyzw0, xyzw1, xyzw2, xyzw3, xyzw0, xyzw1, xyzw2, xyzw3);

CHECK(rtm::vector_all_near_equal(rtm::vector_set(1.0, 4.0, 7.0, 10.0), xyzw0, threshold));
CHECK(rtm::vector_all_near_equal(rtm::vector_set(2.0, 5.0, 8.0, 11.0), xyzw1, threshold));
CHECK(rtm::vector_all_near_equal(rtm::vector_set(3.0, 6.0, 9.0, 12.0), xyzw2, threshold));
CHECK(rtm::vector_all_near_equal(rtm::vector_set(20.0, 21.0, 22.0, 23.0), xyzw3, threshold));
}

{
rtm::vector4d xyz0 = rtm::vector_set(1.0, 2.0, 3.0, 20.0);
rtm::vector4d xyz1 = rtm::vector_set(4.0, 5.0, 6.0, 21.0);
rtm::vector4d xyz2 = rtm::vector_set(7.0, 8.0, 9.0, 22.0);
rtm::vector4d xyz3 = rtm::vector_set(10.0, 11.0, 12.0, 23.0);

rtm::vector4d xxxx;
rtm::vector4d yyyy;
rtm::vector4d zzzz;
RTM_MATRIXD_TRANSPOSE_4X3(xyz0, xyz1, xyz2, xyz3, xxxx, yyyy, zzzz);

CHECK(rtm::vector_all_near_equal(rtm::vector_set(1.0, 4.0, 7.0, 10.0), xxxx, threshold));
CHECK(rtm::vector_all_near_equal(rtm::vector_set(2.0, 5.0, 8.0, 11.0), yyyy, threshold));
CHECK(rtm::vector_all_near_equal(rtm::vector_set(3.0, 6.0, 9.0, 12.0), zzzz, threshold));

// Test when input == output
RTM_MATRIXD_TRANSPOSE_4X3(xyz0, xyz1, xyz2, xyz3, xyz0, xyz1, xyz2);

CHECK(rtm::vector_all_near_equal(rtm::vector_set(1.0, 4.0, 7.0, 10.0), xyz0, threshold));
CHECK(rtm::vector_all_near_equal(rtm::vector_set(2.0, 5.0, 8.0, 11.0), xyz1, threshold));
CHECK(rtm::vector_all_near_equal(rtm::vector_set(3.0, 6.0, 9.0, 12.0), xyz2, threshold));
}

{
rtm::vector4d xyzw0 = rtm::vector_set(1.0, 2.0, 3.0, 20.0);
rtm::vector4d xyzw1 = rtm::vector_set(4.0, 5.0, 6.0, 21.0);
rtm::vector4d xyzw2 = rtm::vector_set(7.0, 8.0, 9.0, 22.0);

rtm::vector4d xxx;
rtm::vector4d yyy;
rtm::vector4d zzz;
rtm::vector4d www;
RTM_MATRIXD_TRANSPOSE_3X4(xyzw0, xyzw1, xyzw2, xxx, yyy, zzz, www);

CHECK(rtm::vector_all_near_equal3(rtm::vector_set(1.0, 4.0, 7.0), xxx, threshold));
CHECK(rtm::vector_all_near_equal3(rtm::vector_set(2.0, 5.0, 8.0), yyy, threshold));
CHECK(rtm::vector_all_near_equal3(rtm::vector_set(3.0, 6.0, 9.0), zzz, threshold));
CHECK(rtm::vector_all_near_equal3(rtm::vector_set(20.0, 21.0, 22.0), www, threshold));

// Test when input == output
RTM_MATRIXD_TRANSPOSE_3X4(xyzw0, xyzw1, xyzw2, xyzw0, xyzw1, xyzw2, xxx);

CHECK(rtm::vector_all_near_equal3(rtm::vector_set(1.0, 4.0, 7.0), xyzw0, threshold));
CHECK(rtm::vector_all_near_equal3(rtm::vector_set(2.0, 5.0, 8.0), xyzw1, threshold));
CHECK(rtm::vector_all_near_equal3(rtm::vector_set(3.0, 6.0, 9.0), xyzw2, threshold));
CHECK(rtm::vector_all_near_equal3(rtm::vector_set(20.0, 21.0, 22.0), xxx, threshold));
}
}
Loading